You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/06/24 23:41:31 UTC

svn commit: r1605204 [2/3] - in /nutch: branches/2.x/ branches/2.x/src/java/org/apache/nutch/api/ branches/2.x/src/java/org/apache/nutch/api/impl/ branches/2.x/src/java/org/apache/nutch/crawl/ branches/2.x/src/java/org/apache/nutch/host/ branches/2.x/s...

Propchange: nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html (original)
+++ nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html Tue Jun 24 21:41:28 2014
@@ -1,6 +1,6 @@
 <html>
 <body>
-<p>A url filter plugin that validates given urls.</p>
+<p>URL filter plugin that validates given urls.</p>
 <p>This plugin runs a series of tests for the given url to make sure that given
 url is valid and 'fetchable'.</p>
 <p>Note: This plugin should <b>only</b> be used for web-related protocols such

Modified: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java (original)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java Tue Jun 24 21:41:28 2014
@@ -31,7 +31,13 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.conf.Configured;
 import org.apache.oro.text.regex.*;
 
-/** Converts URLs to a normal form . */
+/**
+ * Converts URLs to a normal form:
+ * <ul>
+ * <li>remove dot segments in path: <code>/./</code> or <code>/../</code></li>
+ * <li>remove default ports, e.g. 80 for protocol <code>http://</code></li>
+ * </ul>
+ */
 public class BasicURLNormalizer extends Configured implements URLNormalizer {
     public static final Logger LOG = LoggerFactory.getLogger(BasicURLNormalizer.class);
 

Added: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer performing basic normalizations: remove default ports
+ * and dot segments in path.
+ */
+package org.apache.nutch.net.urlnormalizer.basic;

Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer dummy which does not change URLs. Required because at least
+ * one URL normalizer must be defined in any scope.
+ */
+package org.apache.nutch.net.urlnormalizer.pass;

Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer with configurable rules based on regular expressions
+ * ({@link java.util.regex.Pattern}).
+ */
+package org.apache.nutch.net.urlnormalizer.regex;

Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 24 21:41:28 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1787 update and complete API doc overview page (snagel)
+
 * NUTCH-1767 remove special treatment of "params" in relative links (snagel)
 
 * NUTCH-1718 redefine http.robots.agent as "additional agent names" (snagel, Tejas Patil, Daniel Kugel)

Modified: nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Tue Jun 24 21:41:28 2014
@@ -172,6 +172,8 @@
       <packageset dir="${plugins.dir}/index-metadata/src/java"/>
       <packageset dir="${plugins.dir}/index-more/src/java"/>
       <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
       <packageset dir="${plugins.dir}/indexer-solr/src/java"/>
       <packageset dir="${plugins.dir}/language-identifier/src/java"/>
       <packageset dir="${plugins.dir}/lib-http/src/java"/>
@@ -184,11 +186,11 @@
       <packageset dir="${plugins.dir}/parse-swf/src/java"/>
       <packageset dir="${plugins.dir}/parse-tika/src/java"/>
       <packageset dir="${plugins.dir}/parse-zip/src/java"/>
-      <packageset dir="${plugins.dir}/lib-http/src/java"/>
       <packageset dir="${plugins.dir}/protocol-file/src/java"/>
       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
       <packageset dir="${plugins.dir}/scoring-link/src/java"/>
       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
       <packageset dir="${plugins.dir}/subcollection/src/java"/>
@@ -198,12 +200,13 @@
       <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
-      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
       <packageset dir="${plugins.dir}/urlmeta/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
       
       <link href="${javadoc.link.java}"/>
@@ -221,8 +224,10 @@
       <group title="Plugins API" packages="${plugins.api}"/>
       <group title="Protocol Plugins" packages="${plugins.protocol}"/>
       <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
       <group title="Scoring Plugins" packages="${plugins.scoring}"/>
       <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
       <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
       <group title="Indexer Plugins" packages="${plugins.indexer}"/>
       <group title="Misc. Plugins" packages="${plugins.misc}"/>
@@ -572,6 +577,8 @@
       <packageset dir="${plugins.dir}/index-metadata/src/java"/>
       <packageset dir="${plugins.dir}/index-more/src/java"/>
       <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+      <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
       <packageset dir="${plugins.dir}/indexer-solr/src/java"/>
       <packageset dir="${plugins.dir}/language-identifier/src/java"/>
       <packageset dir="${plugins.dir}/lib-http/src/java"/>
@@ -588,6 +595,7 @@
       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
       <packageset dir="${plugins.dir}/scoring-link/src/java"/>
       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
       <packageset dir="${plugins.dir}/subcollection/src/java"/>
@@ -601,7 +609,9 @@
       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
       <packageset dir="${plugins.dir}/urlmeta/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
       
       <link href="${javadoc.link.java}"/>
@@ -619,8 +629,10 @@
       <group title="Plugins API" packages="${plugins.api}"/>
       <group title="Protocol Plugins" packages="${plugins.protocol}"/>
       <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
       <group title="Scoring Plugins" packages="${plugins.scoring}"/>
       <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
       <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
       <group title="Indexer Plugins" packages="${plugins.indexer}"/>
       <group title="Misc. Plugins" packages="${plugins.misc}"/>
@@ -944,9 +956,9 @@
         <source path="${basedir}/src/plugin/index-anchor/src/test/" />
         <source path="${basedir}/src/plugin/index-basic/src/java/" />
         <source path="${basedir}/src/plugin/index-basic/src/test/" />
+        <source path="${basedir}/src/plugin/indexer-dummy/src/java/" />
         <source path="${basedir}/src/plugin/indexer-solr/src/java/" />
         <source path="${basedir}/src/plugin/indexer-elastic/src/java/" />
-        <source path="${basedir}/src/plugin/indexer-dummy/src/java/" />
         <source path="${basedir}/src/plugin/index-metadata/src/java/" />
         <source path="${basedir}/src/plugin/index-more/src/java/" />
         <source path="${basedir}/src/plugin/index-more/src/test/" />

Modified: nutch/trunk/default.properties
URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Tue Jun 24 21:41:28 2014
@@ -97,22 +97,25 @@ plugins.urlfilter=\
    org.apache.nutch.urlfilter.domain*:\
    org.apache.nutch.urlfilter.domainblacklist*:\
    org.apache.nutch.urlfilter.prefix*:\
-   org.apache.nutch.urlfilter.regex*\
+   org.apache.nutch.urlfilter.regex*:\
    org.apache.nutch.urlfilter.suffix*:\
    org.apache.nutch.urlfilter.validator*
 
 #
 # URL Normalizer Plugins
 #
-plugins.urlfilter=\
+plugins.urlnormalizer=\
    org.apache.nutch.net.urlnormalizer.basic*:\
+   org.apache.nutch.net.urlnormalizer.host*:\
    org.apache.nutch.net.urlnormalizer.pass*:\
+   org.apache.nutch.net.urlnormalizer.querystring*:\
    org.apache.nutch.net.urlnormalizer.regex*
 
 #
 # Scoring Plugins
 #
 plugins.scoring=\
+   org.apache.nutch.scoring.depth*:\
    org.apache.nutch.scoring.link*:\
    org.apache.nutch.scoring.opic*:\
    org.apache.nutch.scoring.tld*:\
@@ -133,8 +136,9 @@ plugins.parse=\
 #
 # Parse Filter Plugins
 #
-plugins.parse=\
-   org.apache.nutch.parse.headings*
+plugins.parsefilter=\
+   org.apache.nutch.parse.headings*:\
+   org.apache.nutch.parse.metatags*
 
 #
 # Indexing Filter Plugins
@@ -144,6 +148,7 @@ plugins.index=\
    org.apache.nutch.indexer.basic*:\
    org.apache.nutch.indexer.feed*:\
    org.apache.nutch.indexer.metadata*:\
+   org.apache.nutch.indexer.more*:\
    org.apache.nutch.indexer.static*:\
    org.apache.nutch.indexer.subcollection*:\
    org.apache.nutch.indexer.tld*:\
@@ -153,18 +158,20 @@ plugins.index=\
 # Indexing Backend Plugins
 #
 plugins.indexer=\
+   org.apache.nutch.indexwriter.dummy*:\
+   org.apache.nutch.indexwriter.elastic*:\
    org.apache.nutch.indexwriter.solr*
 
 #
 # Misc. Plugins
 #
 # (gathers plugins that cannot be dispatched
-# in any category, mainly because they contains
+# in any category, mainly because they contain
 # many extension points)
 #
 plugins.misc=\
    org.apache.nutch.collection*:\
    org.apache.nutch.analysis.lang*:\
-   org.creativecommons.nutch*
-   org.apache.nutch.microformats.reltag*:\
+   org.creativecommons.nutch*:\
+   org.apache.nutch.microformats.reltag*
    

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
 <html>
 <body>
-Crawl control code.
+Crawl control code and tools to run the crawler.
 </body>
 </html>

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,10 @@
 <html>
 <body>
-Maintain Lucene full-text indexes.
+Index content, configure and run indexing and cleaning jobs to 
+add, update, and delete documents from an index. Two tasks are
+delegated to plugins:
+<ul>
+<li>indexing filters fill index fields of each documents</li>
+<li>index writer plugins send documents to index back-ends (Solr, etc.).
 </body>
 </html>

Added: nutch/trunk/src/java/org/apache/nutch/net/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/net/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Web-related interfaces: URL {@link org.apache.nutch.net.URLFilter filters}
+ * and {@link org.apache.nutch.net.URLNormalizer normalizers}.
+ */
+package org.apache.nutch.net;

Propchange: nutch/trunk/src/java/org/apache/nutch/net/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java Tue Jun 24 21:41:28 2014
@@ -25,7 +25,7 @@ import org.apache.nutch.metadata.Metadat
 
 
 /**
- * A response inteface.  Makes all protocols model HTTP.
+ * A response interface.  Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
   

Added: nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Helper classes related to the {@link org.apache.nutch.protocol.Protocol Protocol}
+ * interface, sea also {@link org.apache.nutch.protocol}.
+ */
+package org.apache.nutch.net.protocols;

Propchange: nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/parse/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/parse/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.parse.Parse Parse} interface and related classes.
+ */
+package org.apache.nutch.parse;

Propchange: nutch/trunk/src/java/org/apache/nutch/parse/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes related to the {@link org.apache.nutch.protocol.Protocol Protocol} interface,
+ * see also {@link org.apache.nutch.net.protocols}.
+ */
+package org.apache.nutch.protocol;

Propchange: nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
+ */
+package org.apache.nutch.scoring;

Propchange: nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring implementation based on link analysis
+ * ({@link org.apache.nutch.scoring.webgraph.LinkRank}),
+ * see {@link org.apache.nutch.scoring.webgraph.WebGraph}.
+ */
+package org.apache.nutch.scoring.webgraph;

Propchange: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/segment/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/segment/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A segment stores all data from on generate/fetch/update cycle:
+ * fetch list, protocol status, raw content, parsed content, and extracted outgoing links.
+ */
+package org.apache.nutch.segment;

Propchange: nutch/trunk/src/java/org/apache/nutch/segment/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tools to read the
+ * <a href="http://archive.org/web/researcher/ArcFileFormat.php">Arc file format</a>.
+ */
+package org.apache.nutch.tools.arc;

Propchange: nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/tools/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous tools.
+ */
+package org.apache.nutch.tools;

Propchange: nutch/trunk/src/java/org/apache/nutch/tools/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Proxy to {@link org.apache.nutch.tools.Benchmark benchmark} the crawler.
+ */
+package org.apache.nutch.tools.proxy;

Propchange: nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/java/org/apache/nutch/util/domain/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/domain/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/domain/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/domain/package.html Tue Jun 24 21:41:28 2014
@@ -1,8 +1,6 @@
 <html>
 <body>
-<h2> org.apache.nutch.util.domain</h2>
-
-<p>This package contains classes for domain analysis.</p>
+<h2>Classes for domain name analysis.</h2>
 
 for information please refer to following urls : 
 <ul>

Added: nutch/trunk/src/java/org/apache/nutch/util/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/util/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous utility classes.
+ */
+package org.apache.nutch.util;

Propchange: nutch/trunk/src/java/org/apache/nutch/util/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/java/overview.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/overview.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/overview.html (original)
+++ nutch/trunk/src/java/overview.html Tue Jun 24 21:41:28 2014
@@ -3,7 +3,7 @@
    <title>Apache Nutch</title>
 </head>
 <body>
-<p>Apache Nutch is an open source web-search software project. </p>
+<p>Apache Nutch is a highly extensible and scalable open source web crawler software project.</p>
 <p>Nutch is a project of the Apache Software Foundation and is part of the larger Apache community of developers and users.</p>
 </body>
 </html>

Added: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java (added)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to index meta data from RSS feeds.
+ */
+package org.apache.nutch.indexer.feed;

Propchange: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java (added)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse RSS feeds.
+ */
+package org.apache.nutch.parse.feed;

Propchange: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java (added)
+++ nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse filter to extract headings (h1, h2, etc.) from DOM parse tree.
+ */
+package org.apache.nutch.parse.headings;

Propchange: nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html (original)
+++ nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
 <html>
 <body>
-<p>A basic indexing plugin.</p><p></p>
+<p>A basic indexing plugin, adds basic fields: url, host, title, content, etc.</p><p></p>
 </body>
 </html>

Added: nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java (added)
+++ nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to add document metadata to the index.
+ * Metadata may come from CrawlDb, parse or content metadata.
+ */
+package org.apache.nutch.indexer.metadata;

Propchange: nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html (original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,6 @@
 <html>
 <body>
-<p>A more indexing plugin.</p><p></p>
+<p>A more indexing plugin, adds "more" index fields:
+last modified date, MIME type, content length.</p><p></p>
 </body>
 </html>

Added: nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for debugging, writes pairs of &lt;action, url&gt; to a
+ * text file, action is one of "add", "update", or "delete".
+ */
+package org.apache.nutch.indexwriter.dummy;

Propchange: nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://www.elasticsearch.org/">Elasticsearch</a>.
+ */
+package org.apache.nutch.indexwriter.elastic;

Propchange: nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://lucene.apache.org/solr/">Apache Solr</a>.
+ */
+package org.apache.nutch.indexwriter.solr;

Propchange: nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java (added)
+++ nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} library,
+ * abstracting away from regular expression implementations.
+ */
+package org.apache.nutch.urlfilter.api;
+

Propchange: nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse wrapper to run external command to do the parsing.
+ */
+package org.apache.nutch.parse.ext;

Propchange: nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parser and parse filter plugin to extract all (possible) links
+ * from JavaScript files and embedded JavaScript code snippets.
+ */
+package org.apache.nutch.parse.js;

Propchange: nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/parse-metatags/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/plugin.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/plugin.xml (original)
+++ nutch/trunk/src/plugin/parse-metatags/plugin.xml Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
               name="MetaTags Parser"
               point="org.apache.nutch.parse.HtmlParseFilter">
       <implementation id="MetaTagsParser"
-                      class="org.apache.nutch.parse.MetaTagsParser"/>
+                      class="org.apache.nutch.parse.metatags.MetaTagsParser"/>
    </extension>
 
 </plugin>

Added: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java (added)
+++ nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse.metatags;
+
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.HtmlParseFilter;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseResult;
+import org.apache.nutch.protocol.Content;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Parse HTML meta tags (keywords, description) and store them in the parse
+ * metadata so that they can be indexed with the index-metadata plugin with the
+ * prefix 'metatag.'
+ */
+public class MetaTagsParser implements HtmlParseFilter {
+
+  private static final Log LOG = LogFactory.getLog(MetaTagsParser.class
+      .getName());
+
+  private Configuration conf;
+
+  private Set<String> metatagset = new HashSet<String>();
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // specify whether we want a specific subset of metadata
+    // by default take everything we can find
+    String metatags = conf.get("metatags.names", "*");
+    String[] values = metatags.split(";");
+    for (String val : values)
+      metatagset.add(val.toLowerCase());
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  public ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+
+    Parse parse = parseResult.get(content.getUrl());
+    Metadata metadata = parse.getData().getParseMeta();
+
+    // check in the metadata first : the tika-parser
+    // might have stored the values there already
+
+    for (String mdName : metadata.names()) {
+      String value = metadata.get(mdName);
+      // check whether the name is in the list of what we want or if
+      // specified *
+      if (metatagset.contains("*") || metatagset.contains(mdName.toLowerCase())) {
+        LOG.debug("Found meta tag : " + mdName + "\t" + value);
+        metadata.add("metatag." + mdName.toLowerCase(), value);
+      }
+    }
+
+    Metadata generalMetaTags = metaTags.getGeneralTags();
+    for (String tagName : generalMetaTags.names() ) {
+    String[] tagValues = generalMetaTags.getValues(tagName);    
+  
+      for ( String tagValue : tagValues ) {
+      // check whether the name is in the list of what we want or if
+      // specified *
+    	 if (metatagset.contains("*") || metatagset.contains(tagName.toLowerCase())) {
+    		 LOG.debug("Found meta tag : " + tagName + "\t" + tagValue);
+    		 metadata.add("metatag." + tagName.toLowerCase(), tagValue);
+    	 }
+      }
+    }
+
+    Properties httpequiv = metaTags.getHttpEquivTags();
+    for (Enumeration tagNames = httpequiv.propertyNames(); tagNames
+        .hasMoreElements();) {
+      String name = (String) tagNames.nextElement();
+      String value = httpequiv.getProperty(name);
+      // check whether the name is in the list of what we want or if
+      // specified *
+      if (metatagset.contains("*") || metatagset.contains(name.toLowerCase())) {
+        LOG.debug("Found meta tag : " + name + "\t" + value);
+        metadata.add("metatag." + name.toLowerCase(), value);
+      }
+    }
+
+    return parseResult;
+  }
+
+}

Propchange: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse filter to extract meta tags: keywords, description, etc.
+ * Used in combination with index-metadata plugin
+ * (see {@link org.apache.nutch.indexer.metadata}).
+ */
+package org.apache.nutch.parse.metatags;

Propchange: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java?rev=1605204&r1=1605150&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java (original)
+++ nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.nutch.parse.html;
+package org.apache.nutch.parse.metatags;
 
 import java.util.Set;
 import java.util.TreeSet;

Added: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse Flash SWF files.
+ */
+package org.apache.nutch.parse.swf;

Propchange: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse various document formats with help of
+ * <a href="http://tika.apache.org/">Apache Tika</a>.
+ */
+package org.apache.nutch.parse.tika;

Propchange: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse ZIP files: embedded files are recursively passed to appropriate parsers.
+ */
+package org.apache.nutch.parse.zip;

Propchange: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java (original)
+++ nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java Tue Jun 24 21:41:28 2014
@@ -22,7 +22,7 @@ import org.apache.nutch.scoring.ScoringF
 
 /**
  * This scoring filter limits the number of hops from the initial seed urls. If
- * the numbe of hops exceeds the depth (either the default value, or the one
+ * the number of hops exceeds the depth (either the default value, or the one
  * set in the injector file) then all outlinks from that url are discarded,
  * effectively stopping further crawling along this path.
  */

Added: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter to stop crawling at a configurable depth
+ * (number of "hops" from seed URLs).
+ */
+package org.apache.nutch.scoring.depth;

Propchange: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter used in conjunction with
+ * {@link org.apache.nutch.scoring.webgraph.WebGraph}.
+ */
+package org.apache.nutch.scoring.link;

Propchange: nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter implementing a variant of the Online Page Importance Computation
+ * (OPIC) algorithm.
+ */
+package org.apache.nutch.scoring.opic;

Propchange: nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java (added)
+++ nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to assign documents to subcollections.
+ * The field "subcollection" is added and filled with a collection name
+ * defined in a configuration file and selected by pattern, see
+ * {@link org.apache.nutch.collection}.
+ */
+package org.apache.nutch.indexer.subcollection;

Propchange: nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html Tue Jun 24 21:41:28 2014
@@ -1,7 +1,7 @@
 <html>
 <body>
 <p>
-A url filter plugin based on
+URL filter plugin based on
 <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
 Automata for Java<sup>TM</sup>.
 </p>

Added: nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to include only URLs which match an element in a given list of
+ * domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * (exclude URLs by host or domain).
+ */
+package org.apache.nutch.urlfilter.domain;
+

Propchange: nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to exclude URLs by domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domain} for the counterpart (include only URLs
+ * matching host or domain).
+ */
+package org.apache.nutch.urlfilter.domainblacklist;
+

Propchange: nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
 <html>
 <body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include only URLs which match one of a given list of URL prefixes.</p>
 </body>
 </html>

Modified: nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
 <html>
 <body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include and/or exclude URLs matching Java regular expressions.</p>
 </body>
 </html>

Added: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to either exclude or include only URLs which match
+ * one of the given (path) suffixes.
+ */
+package org.apache.nutch.urlfilter.suffix;
+

Propchange: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html Tue Jun 24 21:41:28 2014
@@ -1,6 +1,6 @@
 <html>
 <body>
-<p>A url filter plugin that validates given urls.</p>
+<p>URL filter plugin that validates given urls.</p>
 <p>This plugin runs a series of tests for the given url to make sure that given
 url is valid and 'fetchable'.</p>
 <p>Note: This plugin should <b>only</b> be used for web-related protocols such