You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2021/02/16 18:40:08 UTC

[nutch] branch master updated: NUTCH-2849 Replace remaining package.html files with package-info.java (#569)

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fae4cd  NUTCH-2849 Replace remaining package.html files with package-info.java (#569)
2fae4cd is described below

commit 2fae4cde67a05cf1fa9ecdd6b6bd5307c0e46fe7
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Tue Feb 16 10:40:00 2021 -0800

    NUTCH-2849 Replace remaining package.html files with package-info.java (#569)
---
 build.xml                                          |  7 +++-
 .../org/apache/nutch/crawl/package-info.java}      |  8 ++--
 src/java/org/apache/nutch/crawl/package.html       |  5 ---
 .../org/apache/nutch/fetcher/package-info.java}    |  8 ++--
 src/java/org/apache/nutch/fetcher/package.html     |  5 ---
 .../org/apache/nutch/indexer/package-info.java}    | 16 ++++---
 src/java/org/apache/nutch/indexer/package.html     | 10 -----
 .../org/apache/nutch/metadata/package-info.java}   | 11 ++---
 src/java/org/apache/nutch/metadata/package.html    |  6 ---
 src/java/org/apache/nutch/plugin/package-info.java | 42 +++++++++++++++++++
 src/java/org/apache/nutch/plugin/package.html      | 40 ------------------
 .../apache/nutch/util/domain/package-info.java}    | 17 +++++---
 src/java/org/apache/nutch/util/domain/package.html | 14 -------
 .../org/creativecommons/nutch/package-info.java}   |  8 ++--
 .../java/org/creativecommons/nutch/package.html    |  5 ---
 .../apache/nutch/indexer/anchor/package-info.java} |  8 ++--
 .../org/apache/nutch/indexer/anchor/package.html   |  5 ---
 .../apache/nutch/indexer/basic/package-info.java}  | 10 ++---
 .../org/apache/nutch/indexer/basic/package.html    |  5 ---
 .../apache/nutch/indexer/more/package-info.java}   | 11 ++---
 .../org/apache/nutch/indexer/more/package.html     |  6 ---
 .../nutch/indexer/staticfield/package-info.java}   | 12 +++---
 .../apache/nutch/indexer/staticfield/package.html  |  5 ---
 .../apache/nutch/analysis/lang/package-info.java}  | 13 +++---
 .../org/apache/nutch/analysis/lang/package.html    |  6 ---
 .../nutch/protocol/http/api/package-info.java}     | 11 ++---
 .../apache/nutch/protocol/http/api/package.html    |  6 ---
 .../nutch/microformats/reltag/package-info.java}   | 11 ++---
 .../apache/nutch/microformats/reltag/package.html  |  8 ----
 .../org/apache/nutch/parse/html/package-info.java} | 11 ++---
 .../java/org/apache/nutch/parse/html/package.html  |  5 ---
 .../apache/nutch/protocol/file/package-info.java}  |  8 ++--
 .../org/apache/nutch/protocol/file/package.html    |  5 ---
 .../apache/nutch/protocol/ftp/package-info.java}   |  8 ++--
 .../org/apache/nutch/protocol/ftp/package.html     |  5 ---
 .../htmlunit/{package.html => package-info.java}   |  8 ++--
 .../apache/nutch/protocol/http/package-info.java}  |  8 ++--
 .../org/apache/nutch/protocol/http/package.html    |  5 ---
 .../nutch/protocol/httpclient/package-info.java}   | 15 ++++---
 .../apache/nutch/protocol/httpclient/package.html  |  9 ----
 .../interactiveselenium/package-info.java}         |  8 ++--
 .../protocol/interactiveselenium/package.html      |  5 ---
 .../nutch/protocol/selenium/package-info.java}     |  8 ++--
 .../apache/nutch/protocol/selenium/package.html    |  5 ---
 .../nutch/scoring/metadata/package-info.java       | 32 ++++++++++++++
 .../org/apache/nutch/scoring/metadata/package.html | 33 ---------------
 .../org/apache/nutch/collection/package-info.java  | 49 ++++++++++++++++++++++
 .../java/org/apache/nutch/collection/package.html  | 36 ----------------
 .../apache/nutch/indexer/tld/package-info.java}    |  8 ++--
 .../java/org/apache/nutch/indexer/tld/package.html |  5 ---
 .../apache/nutch/scoring/tld/package-info.java}    |  8 ++--
 .../java/org/apache/nutch/scoring/tld/package.html |  5 ---
 .../nutch/urlfilter/automaton/package-info.java}   | 12 +++---
 .../apache/nutch/urlfilter/automaton/package.html  |  9 ----
 .../nutch/urlfilter/prefix/package-info.java}      |  8 ++--
 .../org/apache/nutch/urlfilter/prefix/package.html |  5 ---
 .../nutch/urlfilter/regex/package-info.java}       | 10 ++---
 .../org/apache/nutch/urlfilter/regex/package.html  |  5 ---
 .../nutch/urlfilter/validator/package-info.java}   | 14 ++++---
 .../apache/nutch/urlfilter/validator/package.html  |  9 ----
 .../nutch/indexer/urlmeta/package-info.java}       | 16 ++++---
 .../org/apache/nutch/indexer/urlmeta/package.html  | 12 ------
 .../nutch/scoring/urlmeta/package-info.java}       | 15 ++++---
 .../org/apache/nutch/scoring/urlmeta/package.html  | 11 -----
 64 files changed, 292 insertions(+), 442 deletions(-)

diff --git a/build.xml b/build.xml
index ec003c3..dcb7b94 100644
--- a/build.xml
+++ b/build.xml
@@ -186,6 +186,7 @@
       doctitle="${name} ${version} API"
       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
       failonerror="true"
+      failonwarning="true"
       >
       <arg value="${javadoc.proxy.host}"/>
       <arg value="${javadoc.proxy.port}"/>
@@ -269,9 +270,9 @@
 
       <link href="${javadoc.link.java}"/>
       <link href="${javadoc.link.hadoop}"/>
-      <link href="${javadoc.link.lucene.core}"/>
+      <!--link href="${javadoc.link.lucene.core}"/>
       <link href="${javadoc.link.lucene.analyzers-common}"/>
-      <link href="${javadoc.link.solr-solrj}"/>
+      <link href="${javadoc.link.solr-solrj}"/-->
 
       <classpath refid="classpath"/>
       <classpath>
@@ -718,6 +719,7 @@
       doctitle="${name} ${version} API"
       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
       failonerror="true"
+      failonwarning="true"
       >
       <arg value="${javadoc.proxy.host}"/>
       <arg value="${javadoc.proxy.port}"/>
@@ -809,6 +811,7 @@
       <classpath>
         <fileset dir="${build.plugins}" >
           <include name="**/*.jar"/>
+          <exclude name="any23/javax.annotation-api*.jar"/>
         </fileset>
       </classpath>
 
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/java/org/apache/nutch/crawl/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/java/org/apache/nutch/crawl/package-info.java
index 4181951..f7a529b 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/java/org/apache/nutch/crawl/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Crawl control code and tools to run the crawler. */
+package org.apache.nutch.crawl;
diff --git a/src/java/org/apache/nutch/crawl/package.html b/src/java/org/apache/nutch/crawl/package.html
deleted file mode 100644
index 05eeb50..0000000
--- a/src/java/org/apache/nutch/crawl/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-Crawl control code and tools to run the crawler.
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/java/org/apache/nutch/fetcher/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/java/org/apache/nutch/fetcher/package-info.java
index 4181951..c06243c 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/java/org/apache/nutch/fetcher/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** The Nutch multi-threaded fetching module */
+package org.apache.nutch.fetcher;
diff --git a/src/java/org/apache/nutch/fetcher/package.html b/src/java/org/apache/nutch/fetcher/package.html
deleted file mode 100644
index 9c843e0..0000000
--- a/src/java/org/apache/nutch/fetcher/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-The Nutch robot.
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/java/org/apache/nutch/indexer/package-info.java
similarity index 67%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/java/org/apache/nutch/indexer/package-info.java
index 4181951..2307dd9 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/java/org/apache/nutch/indexer/package-info.java
@@ -14,8 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/**
+ * Index content, configure and run indexing and cleaning jobs to 
+ * add, update, and delete documents from an index. Two tasks are 
+ * delegated to plugins:
+ * <ul>
+ *  <li>indexing filters, which fill index fields of each document</li>
+ *  <li>index writer plugins; which send documents to index back-ends (Solr, etc.).</li>
+ * </ul>
+ */
+package org.apache.nutch.indexer;
diff --git a/src/java/org/apache/nutch/indexer/package.html b/src/java/org/apache/nutch/indexer/package.html
deleted file mode 100644
index 825eaae..0000000
--- a/src/java/org/apache/nutch/indexer/package.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<html>
-<body>
-Index content, configure and run indexing and cleaning jobs to 
-add, update, and delete documents from an index. Two tasks are
-delegated to plugins:
-<ul>
-<li>indexing filters fill index fields of each documents</li>
-<li>index writer plugins send documents to index back-ends (Solr, etc.).
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/java/org/apache/nutch/metadata/package-info.java
similarity index 85%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/java/org/apache/nutch/metadata/package-info.java
index 4181951..b64dca3 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/java/org/apache/nutch/metadata/package-info.java
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ *  A Multi-valued Metadata container, and set
+ * of constant fields for Nutch Metadata.
+ */
+package org.apache.nutch.metadata;
diff --git a/src/java/org/apache/nutch/metadata/package.html b/src/java/org/apache/nutch/metadata/package.html
deleted file mode 100644
index 53281bb..0000000
--- a/src/java/org/apache/nutch/metadata/package.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<html>
-<body>
-A Multi-valued Metadata container, and set
-of constant fields for Nutch Metadata.
-</body>
-</html>
diff --git a/src/java/org/apache/nutch/plugin/package-info.java b/src/java/org/apache/nutch/plugin/package-info.java
new file mode 100644
index 0000000..f7d95f9
--- /dev/null
+++ b/src/java/org/apache/nutch/plugin/package-info.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** 
+ * The Nutch {@link org.apache.nutch.plugin.Pluggable Plugin} System.
+ * <p><b>The Nutch Plugin System provides a way to extend nutch functionality</b>.
+ * A large part of the functionality of Nutch are provided by plugins:
+ * All of the parsing, indexing and searching that nutch does is actually 
+ * accomplished by various plugins.</p>
+ * <p>In writing a plugin, you're actually providing one or more extensions 
+ * of the existing extension-points (<i>hooks</i>). The core Nutch extension-points 
+ * are themselves defined in a plugin, the <code>nutch-extensionpoints</code> plugin.
+ * Each extension-point defines an interface that must be implemented by the 
+ * extension. The core extension-points and extensions available in Nutch are
+ * listed in the {@link org.apache.nutch.plugin.Pluggable} interface.</p>
+ * @see <a href="./doc-files/plugin.dtd">Nutch plugin manifest DTD</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/PluginCentral">Plugin Central</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/AboutPlugins">About Plugins</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhyNutchHasAPluginSystem">
+ * Why Nutch has a Plugin System?</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhichTechnicalConceptsAreBehindTheNutchPluginSystem">
+ * Which technical concepts are behind the nutch plugin system?</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhatsTheProblemWithPluginsAndClass-loading">
+ * What's the problem with Plugins and Class loading?</a>
+ * @see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WritingPluginExample">
+ * Writing Plugin Example</a>
+ */
+package org.apache.nutch.plugin;
diff --git a/src/java/org/apache/nutch/plugin/package.html b/src/java/org/apache/nutch/plugin/package.html
deleted file mode 100644
index 442ed09..0000000
--- a/src/java/org/apache/nutch/plugin/package.html
+++ /dev/null
@@ -1,40 +0,0 @@
-<html>
-<body>
-The Nutch {@link org.apache.nutch.plugin.Pluggable Plugin} System.
-<p>
-<b>The Nutch Plugin System provides a way to extend nutch functionality</b>.
-A large part of the functionality of Nutch are provided by plugins:
-All of the parsing, indexing and searching that nutch does is actually
-accomplished by various plugins.
-</p><p>
-In writing a plugin, you're actually providing one or more extensions of the
-existing extension-points (<i>hooks</i>).
-The core Nutch extension-points are themselves defined in a plugin,
-the <code>nutch-extensionpoints</code> plugin.
-Each extension-point defines an interface that must be implemented by the
-extension. The core extension-points and extensions available in Nutch are
-listed in the {@link org.apache.nutch.plugin.Pluggable} interface.
-</p>
-
-@see <a href="./doc-files/plugin.dtd">Nutch plugin manifest DTD</a>
-
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/PluginCentral">
-     Plugin Central
-     </a>
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/AboutPlugins">
-     About Plugins
-     </a>
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhyNutchHasAPluginSystem">
-     Why Nutch has a Plugin System?
-     </a>
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhichTechnicalConceptsAreBehindTheNutchPluginSystem">
-     Which technical concepts are behind the nutch plugin system?
-     </a>
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WhatsTheProblemWithPluginsAndClass-loading">
-     What's the problem with Plugins and Class loading?
-     </a>
-@see <a href="https://cwiki.apache.org/confluence/display/NUTCH/WritingPluginExample">
-     Writing Plugin Example
-     </a>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/java/org/apache/nutch/util/domain/package-info.java
similarity index 60%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/java/org/apache/nutch/util/domain/package-info.java
index 4181951..6a799a9 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/java/org/apache/nutch/util/domain/package-info.java
@@ -14,8 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * Classes for domain name analysis. for information refer to 
+ * following urls :
+ * <ul>
+ *  <li><a href="http://en.wikipedia.org/wiki/DNS">http://en.wikipedia.org/wiki/DNS</a></li>
+ *  <li><a href="http://en.wikipedia.org/wiki/Top-level_domain">http://en.wikipedia.org/wiki/Top-level_domain</a></li>
+ *  <li><a href="http://wiki.mozilla.org/TLD_List">http://wiki.mozilla.org/TLD_List</a></li>
+ *  <li><a href="http://publicsuffix.org/">http://publicsuffix.org/</a></li>
+ * </ul>
+ */
+package org.apache.nutch.util.domain;
diff --git a/src/java/org/apache/nutch/util/domain/package.html b/src/java/org/apache/nutch/util/domain/package.html
deleted file mode 100644
index 49e0e6a..0000000
--- a/src/java/org/apache/nutch/util/domain/package.html
+++ /dev/null
@@ -1,14 +0,0 @@
-<html>
-<body>
-<h2>Classes for domain name analysis.</h2>
-
-for information please refer to following urls : 
-<ul>
-<li><a href="http://en.wikipedia.org/wiki/DNS">http://en.wikipedia.org/wiki/DNS</a></li>
-<li><a href="http://en.wikipedia.org/wiki/Top-level_domain">http://en.wikipedia.org/wiki/Top-level_domain</a></li>
-<li><a href="http://wiki.mozilla.org/TLD_List">http://wiki.mozilla.org/TLD_List</a></li>
-<li><a href="http://publicsuffix.org/">http://publicsuffix.org/</a></li>
-</ul>
-
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/creativecommons/src/java/org/creativecommons/nutch/package-info.java
index 4181951..138637c 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Sample plugins that parse and index Creative Commons metadata. */
+package org.creativecommons.nutch;
diff --git a/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package.html b/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package.html
deleted file mode 100644
index 0c91293..0000000
--- a/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Sample plugins that parse and index Creative Commons medadata.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package-info.java
index 4181951..2f3ee87 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** An indexing plugin for inbound anchor text. */
+package org.apache.nutch.indexer.anchor;
diff --git a/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package.html b/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package.html
deleted file mode 100644
index c255029..0000000
--- a/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>An indexing plugin for inbound anchor text.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package-info.java
similarity index 86%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package-info.java
index 4181951..74bba1e 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package-info.java
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/**
+ * A basic indexing plugin, adds basic fields: url, host, title, content, etc.
+ */
+package org.apache.nutch.indexer.basic;
diff --git a/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html b/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
deleted file mode 100644
index 3fae405..0000000
--- a/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>A basic indexing plugin, adds basic fields: url, host, title, content, etc.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package-info.java
similarity index 84%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package-info.java
index 4181951..bd20502 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package-info.java
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * A more indexing plugin, adds "more" index fields:last modified 
+ * date, MIME type, content length.
+ */
+package org.apache.nutch.indexer.more;
diff --git a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
deleted file mode 100644
index 7b8fade..0000000
--- a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<html>
-<body>
-<p>A more indexing plugin, adds "more" index fields:
-last modified date, MIME type, content length.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package-info.java
similarity index 72%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package-info.java
index 4181951..8c6eb3d 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package-info.java
@@ -14,8 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** A simple plugin called at indexing that adds fields with static data. 
+ * You can specify a list of fieldname:fieldcontent per nutch job. 
+ * It can be useful when collections can't be created by urlpatterns, 
+ * like in subcollection, but on a job-basis.
+ */
+package org.apache.nutch.indexer.staticfield;
diff --git a/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html b/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
deleted file mode 100644
index f4b5146..0000000
--- a/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>A simple plugin called at indexing that adds fields with static data. You can specify a list of fieldname:fieldcontent per nutch job. It can be useful when collections can't be created by urlpatterns, like in subcollection, but on a job-basis.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package-info.java
similarity index 72%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package-info.java
index 4181951..b14730b 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package-info.java
@@ -14,8 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * <p>Text document language identifier.</p>
+ * <p>Language profiles are based on material from
+ * <a href="http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps">
+ * http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps</a>.</p>
+ */
+package org.apache.nutch.analysis.lang;
diff --git a/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package.html b/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package.html
deleted file mode 100644
index 06343c8..0000000
--- a/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/package.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<html>
-<body>
-<p>Text document language identifier.</p><p>Language profiles are based on material from
-<a href="http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/">http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/</a>.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package-info.java
similarity index 80%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package-info.java
index 4181951..a99b4ba 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package-info.java
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * Common API used by HTTP plugins ({@link org.apache.nutch.protocol.http http},
+ * {@link org.apache.nutch.protocol.httpclient httpclient})
+ */
+package org.apache.nutch.protocol.http.api;
diff --git a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
deleted file mode 100644
index 972bb3c..0000000
--- a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/package.html
+++ /dev/null
@@ -1,6 +0,0 @@
-<html>
-<body>
-<p>Common API used by HTTP plugins ({@link org.apache.nutch.protocol.http http},
-{@link org.apache.nutch.protocol.httpclient httpclient})</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package-info.java
similarity index 82%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package-info.java
index 4181951..4a828bd 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package-info.java
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * A microformats <a href="http://www.microformats.org/wiki/Rel-Tag">Rel-Tag</a>
+ * Parser/Indexer/Querier plugin.
+ */
+package org.apache.nutch.microformats.reltag;
diff --git a/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package.html b/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package.html
deleted file mode 100644
index bef5409..0000000
--- a/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/package.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<html>
-<body>
-<p>
-A microformats <a href="http://www.microformats.org/wiki/Rel-Tag">Rel-Tag</a>
-Parser/Indexer/Querier plugin.
-</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package-info.java
similarity index 81%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package-info.java
index 4181951..c1b3c46 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package-info.java
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * <p>An HTML document parsing plugin.</p>
+ * <p>This package relies on <a href="https://github.com/codelibs/nekohtml">NekoHTML</a>.</p>
+ */
+package org.apache.nutch.parse.html;
diff --git a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package.html b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package.html
deleted file mode 100644
index c650389..0000000
--- a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>An HTML document parsing plugin.</p><p>This package relies on <a href="http://www.apache.org/~andyc/neko/doc/html/index.html">NekoHTML</a>.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package-info.java
index 4181951..0cc9b74 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving local file resources. */
+package org.apache.nutch.protocol.file;
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html
deleted file mode 100644
index 221c79c..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving local file resources.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package-info.java
similarity index 86%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package-info.java
index 4181951..d64c0fb 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving documents via the ftp protocol. */
+package org.apache.nutch.protocol.ftp;
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package.html b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package.html
deleted file mode 100644
index d936930..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the ftp protocol.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package-info.java
similarity index 86%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package-info.java
index 4181951..bf4902c 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving documents via the http protocol.*/
+package org.apache.nutch.protocol.htmlunit;
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package-info.java
similarity index 86%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package-info.java
index 4181951..cc82483 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving documents via the http protocol. */
+package org.apache.nutch.protocol.http;
diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package.html b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package.html
deleted file mode 100644
index 34d1d1c..0000000
--- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package-info.java
similarity index 67%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package-info.java
index 4181951..2512044 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package-info.java
@@ -14,8 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * Protocol plugin which supports retrieving documents via the 
+ * HTTP andHTTPS protocols, optionally with Basic, Digest and 
+ * NTLM authentication schemes for web server as well as 
+ * proxy server. It handles cookies within a single fetch 
+ * operation. This plugin is based on Jakarta Commons 
+ * HttpClient library.
+ */
+package org.apache.nutch.protocol.httpclient;
diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html
deleted file mode 100644
index 9cbcb14..0000000
--- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the HTTP and
-HTTPS protocols, optionally with Basic, Digest and NTLM authentication
-schemes for web server as well as proxy server. It handles cookies
-within a single fetch operation. This plugin is based on Jakarta
-Commons HttpClient library.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package-info.java
similarity index 86%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package-info.java
index 4181951..f6738ed 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving documents via selenium. */
+package org.apache.nutch.protocol.interactiveselenium;
diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package.html b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package.html
deleted file mode 100644
index 75cd5b5..0000000
--- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via selenium.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package-info.java
index 4181951..2441aec 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Protocol plugin which supports retrieving documents via selenium. */
+package org.apache.nutch.protocol.selenium;
diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html
deleted file mode 100644
index 75cd5b5..0000000
--- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via selenium.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package-info.java b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package-info.java
new file mode 100644
index 0000000..b3ddbd1
--- /dev/null
+++ b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package-info.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <p>Metadata Scoring Plugin</p>
+ * <p>Propagates Metadata from an injected or outlink url in the crawldb 
+ * to the url's different procecssed objects. In moving any metadata 
+ * item, you need to copy metadata in three steps:</p>
+ * <ul>
+ *   <li>Crawldb to content: Copy a metadata entry stored in the crawldb record of the url to the url's fetched content object. You need to specify the entry in the <b>scoring.db.md</b> property</li>
+ *   <li>Content to parsedData: Copy a metadata entry stored in the Content object of a crawled url to its parsedData.  You need to specify the entry in the <b>scoring.content.md</b> property</li>
+ *   <li>ParsedData to outlink objects: Copy a metadata entry stored in the parsedData of a crawl item to the crawldb records of the url's outlinks. You need to specify the entry in the <b>scoring.parse.md</b> property</li>
+ * </ul>
+ * <p>Note that you can not move data directly from a crawldb record to 
+ * parseData or outlink objects. The sequence of moving the metadata 
+ * should be crawldb -&gt; content -&gt; parsedData -&gt; outlink objects.</p>
+ */
+package org.apache.nutch.scoring.metadata;
diff --git a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html
deleted file mode 100644
index 0356152..0000000
--- a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html
+++ /dev/null
@@ -1,33 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-  <body>
-    <p>
-      Metadata Scoring Plugin
-    </p>
-    <p>
-      Propagates Metadata from an injected or outlink url in the crawldb to the url's different procecssed objects. In moving any metadata item, you need to copy metadata in three steps:
-    <ul>
-      <li>Crawldb to content: Copy a metadata entry stored in the crawldb record of the url to the url's fetched content object. You need to specify the entry in the <b>scoring.db.md</b> property</li>
-      <li>Content to parsedData: Copy a metadata entry stored in the Content object of a crawled url to its parsedData.  You need to specify the entry in the <b>scoring.content.md</b> property</li>
-      <li>ParsedData to outlink objects: Copy a metadata entry stored in the parsedData of a crawl item to the crawldb records of the url's outlinks. You need to specify the entry in the <b>scoring.parse.md</b> property</li>
-    </ul>
-
-    Note that you can not move data directly from a crawldb record to parseData or outlink objects. The sequence of moving the metadata should be crawldb -> content -> parsedData -> outlink objects.
-    </p>
-  </body>
-</html>
diff --git a/src/plugin/subcollection/src/java/org/apache/nutch/collection/package-info.java b/src/plugin/subcollection/src/java/org/apache/nutch/collection/package-info.java
new file mode 100644
index 0000000..055cb45
--- /dev/null
+++ b/src/plugin/subcollection/src/java/org/apache/nutch/collection/package-info.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <p>Subcollection is a subset of an index. Subcollections are 
+ * defined by urlpatterns in form of white/blacklist. So to get the 
+ * page into subcollection it must match the whitelist and not 
+ * the blacklist.</p>
+ * <p> Subcollection definitions are read from a file 
+ * <code>subcollections.xml</code> and the format is as follows 
+ * (imagine here that you are crawling all the virtualhosts from 
+ * apache.org and you want to tag pages with url pattern 
+ * "https://nutch.apache.org" and 
+ * "https://cwiki.apache.org/confluence/display/nutch" to be part of 
+ * subcollection "nutch", this allows you to later search specifically 
+ * from this subcollection)</p>
+ * <pre>
+ * {@code
+ * <xml version="1.0" encoding="UTF-8"?>
+ * <subcollections>
+ *  <subcollection>
+ *   <name>nutch</name>
+ *   <id>nutch</id>
+ *   <whitelist>https://nutch.apache.org</whitelist>
+ *   <whitelist>https://cwiki.apache.org/confluence/display/nutch</whitelist>
+ *   <blacklist />
+ *  </subcollection>
+ * </subcollections>
+ * }
+ * </pre>
+ * <p>Despite of this configuration you still can crawl any urls 
+ * as long as they pass through your global url filters. (note that 
+ * you must also seed your urls in normal nutch way)</p>
+ */
+package org.apache.nutch.collection;
diff --git a/src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html b/src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html
deleted file mode 100644
index be08d1c..0000000
--- a/src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<html>
-<body>
-<p>
-Subcollection is a subset of an index. Subcollections are defined
-by urlpatterns in form of white/blacklist. So to get the page into
-subcollection it must match the whitelist and not the blacklist.
-</p>
-<p>
-Subcollection definitions are read from a file subcollections.xml
-and the format is as follows (imagine here that you are crawling all
-the virtualhosts from apache.org and you wan't to tag pages with
-url pattern "http://lucene.apache.org/nutch" and http://wiki.apache.org/nutch/
-to be part of subcollection "nutch", this allows you to later search
-specifically from this subcollection)
-</p>
-<p/>
-<p/>
-<pre>
-&lt;?xml version="1.0" encoding="UTF-8"?>
-&lt;subcollections>
-	&lt;subcollection>
-		&lt;name>nutch&lt;/name>
-		&lt;id>lucene&lt;/id>
-		&lt;whitelist>http://lucene.apache.org/nutch&lt;/whitelist>
-		&lt;whitelist>http://wiki.apache.org/nutch/&lt;/whitelist>
-		&lt;blacklist />
-	&lt;/subcollection>
-&lt;/subcollections>
-</pre>
-</p>
-<p>Despite of this configuration you still can crawl any urls
-as long as they pass through your global url filters. (note that
-you must also seed your urls in normal nutch way)
-</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package-info.java
index 4181951..6696fdc 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Top Level Domain Indexing plugin. */
+package org.apache.nutch.indexer.tld;
diff --git a/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package.html b/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package.html
deleted file mode 100644
index 75841d9..0000000
--- a/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Top Level Domain Indexing plugin.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package-info.java
similarity index 87%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package-info.java
index 4181951..6ab8373 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** Top Level Domain Scoring plugin. */
+package org.apache.nutch.scoring.tld;
diff --git a/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package.html b/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package.html
deleted file mode 100644
index d05e4b8..0000000
--- a/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Top Level Domain Scoring plugin.</p><p></p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package-info.java
similarity index 79%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package-info.java
index 4181951..41743a3 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package-info.java
@@ -14,8 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * URL filter plugin based on 
+ * <a href="https://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State 
+ * Automata for Java<sup>TM</sup>.
+ */
+package org.apache.nutch.urlfilter.automaton;
diff --git a/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html b/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
deleted file mode 100644
index 282013f..0000000
--- a/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
-<body>
-<p>
-URL filter plugin based on
-<a href="https://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
-Automata for Java<sup>TM</sup>.
-</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package-info.java
similarity index 85%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package-info.java
index 4181951..1718ee8 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package-info.java
@@ -14,8 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** URL filter plugin to include only URLs which match one of a given list of URL prefixes. */
+package org.apache.nutch.urlfilter.prefix;
diff --git a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
deleted file mode 100644
index dbed0be..0000000
--- a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>URL filter plugin to include only URLs which match one of a given list of URL prefixes.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package-info.java
similarity index 85%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package-info.java
index 4181951..8cb3afa 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package-info.java
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/** 
+ * URL filter plugin to include and/or exclude URLs matching Java regular expressions.
+ */
+package org.apache.nutch.urlfilter.regex;
diff --git a/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html b/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
deleted file mode 100644
index 7acf73b..0000000
--- a/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>URL filter plugin to include and/or exclude URLs matching Java regular expressions.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package-info.java
similarity index 70%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package-info.java
index 4181951..11d2cde 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package-info.java
@@ -14,8 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/**
+ * <p>URL filter plugin that validates given urls.</p>
+ * <p>This plugin runs a series of tests for the given url to make sure that given
+ * url is valid and 'fetchable'.</p>
+ * <p>Note: This plugin should <b>only</b> be used for web-related protocols such
+ * as http, https and ftp.</p>
+ */
+package org.apache.nutch.urlfilter.validator;
diff --git a/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html b/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
deleted file mode 100644
index b5ec8a1..0000000
--- a/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
+++ /dev/null
@@ -1,9 +0,0 @@
-<html>
-<body>
-<p>URL filter plugin that validates given urls.</p>
-<p>This plugin runs a series of tests for the given url to make sure that given
-url is valid and 'fetchable'.</p>
-<p>Note: This plugin should <b>only</b> be used for web-related protocols such
-as http, https and ftp.</p>
-</body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package-info.java
similarity index 62%
copy from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
copy to src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package-info.java
index 4181951..1a1239b 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package-info.java
@@ -14,8 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/**
+ * <p>URL Meta Tag Indexing Plugin</p>
+ * <p>Takes Meta Tags, injected alongside a URL 
+ * (see <A href="https://issues.apache.org/jira/browse/NUTCH-655">NUTCH-655</a>) 
+ * and specified in the "urlmeta.tags" property, and inserts them into 
+ * the document--which is then sent to the Indexer. If you specify 
+ * these fields in the Nutch schema (as well as the Indexer's), you 
+ * can reasonably assume that they will be indexed.</p>
+ */
+package org.apache.nutch.indexer.urlmeta;
diff --git a/src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package.html b/src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package.html
deleted file mode 100644
index 5da5d56..0000000
--- a/src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/package.html
+++ /dev/null
@@ -1,12 +0,0 @@
-<html>
-  <body>
-    <p>
-      URL Meta Tag Indexing Plugin
-    </p>
-    <p>
-      Takes Meta Tags, injected alongside a URL (see NUTCH-655) and specified in the "urlmeta.tags" property, 
-      and inserts them into the document--which is then sent to the Indexer.  If you specify these fields in
-      the Nutch's schema (as well as the Indexer's), you can reasonably assume that they will be indexed.
-    </p>
-  </body>
-</html>
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html b/src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package-info.java
similarity index 69%
rename from src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
rename to src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package-info.java
index 4181951..df182c4 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/package.html
+++ b/src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package-info.java
@@ -14,8 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-<html>
-<body>
-<p>Protocol plugin which supports retrieving documents via the http protocol.</p><p></p>
-</body>
-</html>
+
+/**
+ * <p>URL Meta Tag Scoring Plugin</p>
+ * <p>Propagates Meta Tags, injected alongside a URL 
+ * (see <a href="https://issues.apache.org/jira/browse/NUTCH-655">NUTCH-655</a>) 
+ * and specified in the "urlmeta.tags" property, along to their outlinks. 
+ * This does not actually perform scoring.</p>
+ */
+package org.apache.nutch.scoring.urlmeta;
+
diff --git a/src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package.html b/src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package.html
deleted file mode 100644
index 5bba7a8..0000000
--- a/src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/package.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<html>
-  <body>
-    <p>
-      URL Meta Tag Scoring Plugin
-    </p>
-    <p>
-      Propagates Meta Tags, injected alongside a URL (see NUTCH-655) and specified in the "urlmeta.tags" property, 
-      along to their outlinks.  This does not actually perform scoring.
-    </p>
-  </body>
-</html>