You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/06/24 23:41:31 UTC
svn commit: r1605204 [1/3] - in /nutch: branches/2.x/
branches/2.x/src/java/org/apache/nutch/api/
branches/2.x/src/java/org/apache/nutch/api/impl/
branches/2.x/src/java/org/apache/nutch/crawl/
branches/2.x/src/java/org/apache/nutch/host/ branches/2.x/s...
Author: snagel
Date: Tue Jun 24 21:41:28 2014
New Revision: 1605204
URL: http://svn.apache.org/r1605204
Log:
NUTCH-1787 update and complete API doc overview page
Added:
nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java (with props)
nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java (with props)
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java (with props)
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java (with props)
nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java (with props)
nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java (with props)
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java (with props)
nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/
nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java (with props)
nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/
nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java
- copied, changed from r1605193, nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java (with props)
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java (with props)
nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java (with props)
nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java (with props)
nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java (with props)
nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java (with props)
nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java (with props)
nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java (with props)
nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java (with props)
nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/net/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/parse/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/segment/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/tools/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java (with props)
nutch/trunk/src/java/org/apache/nutch/util/package-info.java (with props)
nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java (with props)
nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java (with props)
nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java (with props)
nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java (with props)
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java (with props)
nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java (with props)
nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java (with props)
nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java (with props)
nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java (with props)
nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java (with props)
nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/
nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java (with props)
nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java (with props)
nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/
- copied from r1605150, nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/
nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java (with props)
nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java (with props)
nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java (with props)
nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java (with props)
nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java (with props)
nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java (with props)
nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java (with props)
nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java (with props)
nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java (with props)
nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java (with props)
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java (with props)
nutch/trunk/src/plugin/urlnormalizer-host/src/java/org/apache/nutch/net/urlnormalizer/host/package-info.java (with props)
nutch/trunk/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java (with props)
nutch/trunk/src/plugin/urlnormalizer-querystring/src/java/org/apache/nutch/net/urlnormalizer/querystring/package-info.java (with props)
nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java (with props)
Removed:
nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package.html
nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/MetaTagsParser.java
nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package.html
nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/package.html
nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/MetaTagsParser.java
nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/
nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package.html
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/build.xml
nutch/branches/2.x/default.properties
nutch/branches/2.x/src/java/org/apache/nutch/crawl/package.html
nutch/branches/2.x/src/java/org/apache/nutch/indexer/package.html
nutch/branches/2.x/src/java/org/apache/nutch/util/domain/package.html
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
nutch/branches/2.x/src/plugin/parse-metatags/plugin.xml
nutch/branches/2.x/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
nutch/branches/2.x/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
nutch/branches/2.x/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml
nutch/trunk/default.properties
nutch/trunk/src/java/org/apache/nutch/crawl/package.html
nutch/trunk/src/java/org/apache/nutch/indexer/package.html
nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java
nutch/trunk/src/java/org/apache/nutch/util/domain/package.html
nutch/trunk/src/java/overview.html
nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
nutch/trunk/src/plugin/parse-metatags/plugin.xml
nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java
nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java
nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Jun 24 21:41:28 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1787 update and complete API doc overview page (snagel)
+
* NUTCH-1767 remove special treatment of "params" in relative links (snagel)
* NUTCH-1718 redefine http.robots.agent as "additional agent names" (snagel, Tejas Patil, Daniel Kugel)
Modified: nutch/branches/2.x/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/build.xml (original)
+++ nutch/branches/2.x/build.xml Tue Jun 24 21:41:28 2014
@@ -161,24 +161,24 @@
<packageset dir="${src.dir}"/>
<packageset dir="${plugins.dir}/creativecommons/src/java"/>
- <packageset dir="${plugins.dir}/feed/src/java"/>
- <packageset dir="${plugins.dir}/index-metadata/src/java"/>
+ <!--packageset dir="${plugins.dir}/feed/src/java"/-->
<packageset dir="${plugins.dir}/index-anchor/src/java"/>
<packageset dir="${plugins.dir}/index-basic/src/java"/>
- <packageset dir="${plugins.dir}/index-more/src/java"/>
<packageset dir="${plugins.dir}/index-metadata/src/java"/>
+ <packageset dir="${plugins.dir}/index-more/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-elastic/src/java"/>
<packageset dir="${plugins.dir}/indexer-solr/src/java"/>
<packageset dir="${plugins.dir}/language-identifier/src/java"/>
<packageset dir="${plugins.dir}/lib-http/src/java"/>
<packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
<packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
- <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-ext/src/java"/-->
<packageset dir="${plugins.dir}/parse-html/src/java"/>
<packageset dir="${plugins.dir}/parse-metatags/src/java"/>
<packageset dir="${plugins.dir}/parse-js/src/java"/>
- <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-swf/src/java"/-->
<packageset dir="${plugins.dir}/parse-tika/src/java"/>
- <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-zip/src/java"/-->
<packageset dir="${plugins.dir}/protocol-file/src/java"/>
<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
<packageset dir="${plugins.dir}/protocol-http/src/java"/>
@@ -213,8 +213,10 @@
<group title="Plugins API" packages="${plugins.api}" />
<group title="Protocol Plugins" packages="${plugins.protocol}" />
<group title="URL Filter Plugins" packages="${plugins.urlfilter}" />
+ <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
<group title="Scoring Plugins" packages="${plugins.scoring}" />
<group title="Parse Plugins" packages="${plugins.parse}" />
+ <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
<group title="Indexing Filter Plugins" packages="${plugins.index}" />
<group title="Indexer Plugins" packages="${plugins.indexer}"/>
<group title="Misc. Plugins" packages="${plugins.misc}" />
@@ -588,22 +590,24 @@
<packageset dir="${src.dir}"/>
<packageset dir="${plugins.dir}/creativecommons/src/java"/>
- <packageset dir="${plugins.dir}/feed/src/java"/>
+ <!--packageset dir="${plugins.dir}/feed/src/java"/-->
<packageset dir="${plugins.dir}/index-anchor/src/java"/>
<packageset dir="${plugins.dir}/index-basic/src/java"/>
+ <packageset dir="${plugins.dir}/index-metadata/src/java"/>
<packageset dir="${plugins.dir}/index-more/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-elastic/src/java"/>
<packageset dir="${plugins.dir}/indexer-solr/src/java"/>
<packageset dir="${plugins.dir}/language-identifier/src/java"/>
<packageset dir="${plugins.dir}/lib-http/src/java"/>
<packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
<packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
- <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-ext/src/java"/-->
<packageset dir="${plugins.dir}/parse-html/src/java"/>
<packageset dir="${plugins.dir}/parse-js/src/java"/>
<packageset dir="${plugins.dir}/parse-metatags/src/java"/>
- <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-swf/src/java"/-->
<packageset dir="${plugins.dir}/parse-tika/src/java"/>
- <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+ <!--packageset dir="${plugins.dir}/parse-zip/src/java"/-->
<packageset dir="${plugins.dir}/protocol-file/src/java"/>
<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
<packageset dir="${plugins.dir}/protocol-http/src/java"/>
@@ -638,8 +642,10 @@
<group title="Plugins API" packages="${plugins.api}" />
<group title="Protocol Plugins" packages="${plugins.protocol}" />
<group title="URL Filter Plugins" packages="${plugins.urlfilter}" />
+ <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
<group title="Scoring Plugins" packages="${plugins.scoring}" />
<group title="Parse Plugins" packages="${plugins.parse}" />
+ <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
<group title="Indexing Filter Plugins" packages="${plugins.index}" />
<group title="Indexer Plugins" packages="${plugins.indexer}"/>
<group title="Misc. Plugins" packages="${plugins.misc}" />
@@ -952,9 +958,9 @@
<source path="${basedir}/src/plugin/index-anchor/src/test/" />
<source path="${basedir}/src/plugin/index-basic/src/java/" />
<source path="${basedir}/src/plugin/index-basic/src/test/" />
+ <source path="${basedir}/src/plugin/index-metadata/src/java/" />
<source path="${basedir}/src/plugin/index-more/src/java/" />
<source path="${basedir}/src/plugin/index-more/src/test/" />
- <source path="${basedir}/src/plugin/index-metadata/src/java/" />
<source path="${basedir}/src/plugin/language-identifier/src/java/" />
<source path="${basedir}/src/plugin/language-identifier/src/test/" />
<source path="${basedir}/src/plugin/lib-http/src/java/" />
@@ -989,20 +995,23 @@
<source path="${basedir}/src/plugin/protocol-sftp/src/java/" />
<source path="${basedir}/src/plugin/scoring-link/src/java/" />
<source path="${basedir}/src/plugin/scoring-opic/src/java/" />
+ <source path="${basedir}/src/plugin/scoring-opic/src/test/" />
<source path="${basedir}/src/plugin/subcollection/src/java/" />
<source path="${basedir}/src/plugin/subcollection/src/test/" />
<source path="${basedir}/src/plugin/tld/src/java/" />
+ <source path="${basedir}/src/plugin/tld/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-automaton/src/java/" />
<source path="${basedir}/src/plugin/urlfilter-automaton/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-domain/src/java/" />
<source path="${basedir}/src/plugin/urlfilter-domain/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-prefix/src/java/" />
+ <source path="${basedir}/src/plugin/urlfilter-prefix/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-regex/src/java/" />
<source path="${basedir}/src/plugin/urlfilter-regex/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-suffix/src/java/" />
<source path="${basedir}/src/plugin/urlfilter-suffix/src/test/" />
<source path="${basedir}/src/plugin/urlfilter-validator/src/java/" />
- <source path="${basedir}/src/plugin/urlfilter-validator/src/test/" />
+ <source path="${basedir}/src/plugin/urlfilter-validator/src/test/" />
<source path="${basedir}/src/plugin/urlnormalizer-basic/src/java/" />
<source path="${basedir}/src/plugin/urlnormalizer-basic/src/test/" />
<source path="${basedir}/src/plugin/urlnormalizer-pass/src/java/" />
Modified: nutch/branches/2.x/default.properties
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/default.properties?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/default.properties (original)
+++ nutch/branches/2.x/default.properties Tue Jun 24 21:41:28 2014
@@ -89,7 +89,7 @@ plugins.protocol=\
org.apache.nutch.protocol.file*:\
org.apache.nutch.protocol.ftp*:\
org.apache.nutch.protocol.http*:\
- org.apache.nutch.protocol.httpclient*\
+ org.apache.nutch.protocol.httpclient*:\
org.apache.nutch.protocol.sftp*
#
@@ -99,22 +99,25 @@ plugins.urlfilter=\
org.apache.nutch.urlfilter.automaton*:\
org.apache.nutch.urlfilter.domain*:\
org.apache.nutch.urlfilter.prefix*:\
- org.apache.nutch.urlfilter.regex*\
+ org.apache.nutch.urlfilter.regex*:\
org.apache.nutch.urlfilter.suffix*:\
org.apache.nutch.urlfilter.validator*
#
# URL Normalizer Plugins
#
-plugins.urlfilter=\
+plugins.urlnormalizer=\
org.apache.nutch.net.urlnormalizer.basic*:\
+ org.apache.nutch.net.urlnormalizer.host*:\
org.apache.nutch.net.urlnormalizer.pass*:\
+ org.apache.nutch.net.urlnormalizer.querystring*:\
org.apache.nutch.net.urlnormalizer.regex*
#
# Scoring Plugins
#
plugins.scoring=\
+ org.apache.nutch.scoring.depth*:\
org.apache.nutch.scoring.link*:\
org.apache.nutch.scoring.opic*:\
org.apache.nutch.scoring.tld*
@@ -123,21 +126,28 @@ plugins.scoring=\
# Parse Plugins
#
plugins.parse=\
- org.apache.nutch.parse.ext*:\
- org.apache.nutch.parse.feed*:\
org.apache.nutch.parse.html*:\
org.apache.nutch.parse.js:\
- org.apache.nutch.parse.swf*:\
- org.apache.nutch.parse.tika:\
- org.apache.nutch.parse.zip
+ org.apache.nutch.parse.tika
+# org.apache.nutch.parse.ext*:\
+# org.apache.nutch.parse.feed*:\
+# org.apache.nutch.parse.swf*:\
+# org.apache.nutch.parse.zip
#
+# Parse Filter Plugins
+#
+plugins.parsefilter=\
+ org.apache.nutch.parse.metatags*
+
+#
# Indexing Filter Plugins
#
plugins.index=\
org.apache.nutch.indexer.anchor*:\
org.apache.nutch.indexer.basic*:\
org.apache.nutch.indexer.feed*:\
+ org.apache.nutch.indexer.metadata*:\
org.apache.nutch.indexer.more*:\
org.apache.nutch.indexer.subcollection*:\
org.apache.nutch.indexer.tld*
@@ -145,13 +155,14 @@ plugins.index=\
# Indexing Backend Plugins
#
plugins.indexer=\
+ org.apache.nutch.indexwriter.elastic*:\
org.apache.nutch.indexwriter.solr*
#
# Misc. Plugins
#
# (gathers plugins that cannot be dispatched
-# in any category, mainly because they contains
+# in any category, mainly because they contain
# many extension points)
#
plugins.misc=\
Added: nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implementations of REST API interfaces.
+ */
+package org.apache.nutch.api.impl;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/api/impl/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * REST API to run and control crawl jobs.
+ */
+package org.apache.nutch.api;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/api/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/package.html (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-Crawl control code.
+Crawl control code and tools to run the crawler.
</body>
</html>
Added: nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Host database to store metadata per host.
+ */
+package org.apache.nutch.host;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/host/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/package.html (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,10 @@
<html>
<body>
-Maintain Lucene full-text indexes.
+Index content, configure and run indexing and cleaning jobs to
+add, update, and delete documents from an index. Two tasks are
+delegated to plugins:
+<ul>
+<li>indexing filters fill index fields of each documents</li>
+<li>index writer plugins send documents to index back-ends (Solr, etc.).
</body>
</html>
Added: nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Web-related interfaces: URL {@link org.apache.nutch.net.URLFilter filters}
+ * and {@link org.apache.nutch.net.URLNormalizer normalizers}.
+ */
+package org.apache.nutch.net;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/net/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Helper classes related to the {@link org.apache.nutch.protocol.Protocol Protocol}
+ * interface, sea also {@link org.apache.nutch.protocol}.
+ */
+package org.apache.nutch.net.protocols;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/net/protocols/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.parse.Parse Parse} interface and related classes.
+ */
+package org.apache.nutch.parse;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/parse/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes related to the {@link org.apache.nutch.protocol.Protocol Protocol} interface,
+ * see also {@link org.apache.nutch.net.protocols}.
+ */
+package org.apache.nutch.protocol;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/protocol/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
+ */
+package org.apache.nutch.scoring;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/scoring/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Representation ({@link org.apache.nutch.storage.WebPage web pages},
+ * {@link org.apache.nutch.storage.Host host metadata}) of data in abstracted storage.
+ */
+package org.apache.nutch.storage;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/storage/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tools to read the
+ * <a href="http://archive.org/web/researcher/ArcFileFormat.php">Arc file format</a>.
+ */
+package org.apache.nutch.tools.arc;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/tools/arc/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous tools.
+ */
+package org.apache.nutch.tools;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/tools/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Proxy to {@link org.apache.nutch.tools.Benchmark benchmark} the crawler.
+ */
+package org.apache.nutch.tools.proxy;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/tools/proxy/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/domain/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/domain/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/domain/package.html (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/domain/package.html Tue Jun 24 21:41:28 2014
@@ -1,8 +1,6 @@
<html>
<body>
-<h2> org.apache.nutch.util.domain</h2>
-
-<p>This package contains classes for domain analysis.</p>
+<h2>Classes for domain name analysis.</h2>
for information please refer to following urls :
<ul>
Added: nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java (added)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous utility classes.
+ */
+package org.apache.nutch.util;
Propchange: nutch/branches/2.x/src/java/org/apache/nutch/util/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to index meta data from RSS feeds.
+ */
+package org.apache.nutch.indexer.feed;
Propchange: nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse RSS feeds.
+ */
+package org.apache.nutch.parse.feed;
Propchange: nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html (original)
+++ nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A basic indexing plugin.</p><p></p>
+<p>A basic indexing plugin, adds basic fields: url, host, title, content, etc.</p><p></p>
</body>
</html>
Added: nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to add document metadata to the index.
+ * Metadata may come from CrawlDb, parse or content metadata.
+ */
+package org.apache.nutch.indexer.metadata;
Propchange: nutch/branches/2.x/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html (original)
+++ nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,6 @@
<html>
<body>
-<p>A more indexing plugin.</p><p></p>
+<p>A more indexing plugin, adds "more" index fields:
+last modified date, MIME type, content length.</p><p></p>
</body>
</html>
Added: nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://www.elasticsearch.org/">Elasticsearch</a>.
+ */
+package org.apache.nutch.indexwriter.elastic;
Propchange: nutch/branches/2.x/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://lucene.apache.org/solr/">Apache Solr</a>.
+ */
+package org.apache.nutch.indexwriter.solr;
Propchange: nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} library,
+ * abstracting away from regular expression implementations.
+ */
+package org.apache.nutch.urlfilter.api;
+
Propchange: nutch/branches/2.x/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse wrapper to run external command to do the parsing.
+ */
+package org.apache.nutch.parse.ext;
Propchange: nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parser and parse filter plugin to extract all (possible) links
+ * from JavaScript files and embedded JavaScript code snippets.
+ */
+package org.apache.nutch.parse.js;
Propchange: nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/parse-metatags/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/plugin.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/plugin.xml (original)
+++ nutch/branches/2.x/src/plugin/parse-metatags/plugin.xml Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
name="MetaTags Parser"
point="org.apache.nutch.parse.ParseFilter">
<implementation id="MetaTagsParser"
- class="org.apache.nutch.parse.MetaTagsParser"/>
+ class="org.apache.nutch.parse.metatags.MetaTagsParser"/>
</extension>
</plugin>
Added: nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java (added)
+++ nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse.metatags;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import org.apache.avro.util.Utf8;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseFilter;
+import org.apache.nutch.storage.WebPage;
+import org.apache.nutch.storage.WebPage.Field;
+import org.apache.nutch.util.Bytes;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Parse HTML meta tags (keywords, description) and store them in the parse
+ * metadata so that they can be indexed with the index-metadata plugin with the
+ * prefix 'metatag.'
+ */
+public class MetaTagsParser implements ParseFilter {
+
+ private static final Log LOG = LogFactory.getLog(MetaTagsParser.class
+ .getName());
+
+ private Configuration conf;
+
+ public static final String PARSE_META_PREFIX = "meta_";
+
+ private Set<String> metatagset = new HashSet<String>();
+
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ // specify whether we want a specific subset of metadata
+ // by default take everything we can find
+ String metatags = conf.get("metatags.names", "*");
+ String[] values = metatags.split(";");
+ for (String val : values)
+ metatagset.add(val.toLowerCase());
+ if(metatagset.size()==0){
+ metatagset.add("*");
+ }
+ }
+
+ public Configuration getConf() {
+ return this.conf;
+ }
+
+ public Parse filter(String url, WebPage page, Parse parse,
+ HTMLMetaTags metaTags, DocumentFragment doc) {
+
+ Map<Utf8, ByteBuffer> metadata = new HashMap<Utf8, ByteBuffer>();
+
+ // check in the metadata first : the tika-parser
+ // might have stored the values there already
+ Iterator<Entry<CharSequence, ByteBuffer>> iterator = page.getMetadata().entrySet().iterator();
+ while (iterator.hasNext()) {
+ Entry<CharSequence, ByteBuffer> entry = iterator.next();
+ String mdName = entry.getKey().toString();
+ String value = Bytes.toStringBinary(entry.getValue());
+ if (metatagset.contains("*") || metatagset.contains(mdName.toLowerCase())) {
+ // now add the metadata
+ LOG.debug("Found meta tag: '" + mdName + "', with value: '" + value
+ + "'");
+ metadata.put(new Utf8(PARSE_META_PREFIX + mdName.toLowerCase()),
+ ByteBuffer.wrap(value.getBytes()));
+ }
+ }
+ Iterator<Entry<Utf8, ByteBuffer>> itm = metadata.entrySet().iterator();
+ while (iterator.hasNext()) {
+ Entry<Utf8, ByteBuffer> entry = itm.next();
+ page.getMetadata().put(entry.getKey(), entry.getValue());
+ }
+
+ Properties generalMetaTags = metaTags.getGeneralTags();
+ Iterator<Object> it = generalMetaTags.keySet().iterator();
+ while (it.hasNext()) {
+ StringBuilder sb = new StringBuilder();
+ String name = (String) it.next();
+ String[] values = new String[] { (String) generalMetaTags.get(name) };
+ // The multivalues of a metadata field are saved with a separator '\t'
+ // in the storage
+ // unless there is only one entry, where no \t is appended.
+ for (String value : values) {
+ if (values.length > 1) {
+ sb.append(value).append("\t");
+ } else {
+ sb.append(value);
+ }
+ }
+ // check whether the name is in the list of what we want or if
+ // specified *
+ if (metatagset.contains("*") || metatagset.contains(name.toLowerCase())) {
+ // Add the recently parsed value of multiValued array to metadata
+ LOG.debug("Found meta tag : " + name + "\t" + sb.toString());
+ page.getMetadata().put(new Utf8(PARSE_META_PREFIX + name.toLowerCase()),
+ ByteBuffer.wrap(Bytes.toBytes(sb.toString())));
+ }
+ }
+
+ Properties httpequiv = metaTags.getHttpEquivTags();
+ Enumeration<?> tagNames = httpequiv.propertyNames();
+ while (tagNames.hasMoreElements()) {
+ String name = (String) tagNames.nextElement();
+ String value = httpequiv.getProperty(name);
+ // check whether the name is in the list of what we want or if
+ // specified *
+ if (metatagset.contains("*") || metatagset.contains(name.toLowerCase())) {
+ LOG.debug("Found meta tag : " + name + "\t" + value);
+ page.getMetadata().put(new Utf8(PARSE_META_PREFIX + name.toLowerCase()),
+ ByteBuffer.wrap(value.getBytes()));
+ }
+ }
+
+ return parse;
+ }
+
+ @Override
+ public Collection<Field> getFields() {
+ return null;
+ }
+
+}
Propchange: nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse filter to extract meta tags: keywords, description, etc.
+ * Used in combination with index-metadata plugin
+ * (see {@link org.apache.nutch.indexer.metadata}).
+ */
+package org.apache.nutch.parse.metatags;
Propchange: nutch/branches/2.x/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java (from r1605193, nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java)
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java?p2=nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java&p1=nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java&r1=1605193&r2=1605204&rev=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetaTagsParser.java Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nutch.parse;
+package org.apache.nutch.parse.metatags;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
@@ -26,6 +26,7 @@ import org.apache.html.dom.HTMLDocumentI
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
+import org.apache.nutch.parse.metatags.MetaTagsParser;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchConfiguration;
Added: nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse Flash SWF files.
+ */
+package org.apache.nutch.parse.swf;
Propchange: nutch/branches/2.x/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse various document formats with help of
+ * <a href="http://tika.apache.org/">Apache Tika</a>.
+ */
+package org.apache.nutch.parse.tika;
Propchange: nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse ZIP files: embedded files are recursively passed to appropriate parsers.
+ */
+package org.apache.nutch.parse.zip;
Propchange: nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter used in conjunction with
+ * {@link org.apache.nutch.scoring.webgraph.WebGraph}.
+ */
+package org.apache.nutch.scoring.link;
Propchange: nutch/branches/2.x/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter implementing a variant of the Online Page Importance Computation
+ * (OPIC) algorithm.
+ */
+package org.apache.nutch.scoring.opic;
Propchange: nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to assign documents to subcollections.
+ * The field "subcollection" is added and filled with a collection name
+ * defined in a configuration file and selected by pattern, see
+ * {@link org.apache.nutch.collection}.
+ */
+package org.apache.nutch.indexer.subcollection;
Propchange: nutch/branches/2.x/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html (original)
+++ nutch/branches/2.x/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html Tue Jun 24 21:41:28 2014
@@ -1,7 +1,7 @@
<html>
<body>
<p>
-A url filter plugin based on
+URL filter plugin based on
<a href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
Automata for Java<sup>TM</sup>.
</p>
Added: nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to include only URLs which match an element in a given list of
+ * domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * (exclude URLs by host or domain).
+ */
+package org.apache.nutch.urlfilter.domain;
+
Propchange: nutch/branches/2.x/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html (original)
+++ nutch/branches/2.x/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include only URLs which match one of a given list of URL prefixes.</p>
</body>
</html>
Modified: nutch/branches/2.x/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html (original)
+++ nutch/branches/2.x/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include and/or exclude URLs matching Java regular expressions.</p>
</body>
</html>
Added: nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to either exclude or include only URLs which match
+ * one of the given (path) suffixes.
+ */
+package org.apache.nutch.urlfilter.suffix;
+