You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/06/24 23:41:31 UTC
svn commit: r1605204 [2/3] - in /nutch: branches/2.x/
branches/2.x/src/java/org/apache/nutch/api/
branches/2.x/src/java/org/apache/nutch/api/impl/
branches/2.x/src/java/org/apache/nutch/crawl/
branches/2.x/src/java/org/apache/nutch/host/ branches/2.x/s...
Propchange: nutch/branches/2.x/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html (original)
+++ nutch/branches/2.x/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html Tue Jun 24 21:41:28 2014
@@ -1,6 +1,6 @@
<html>
<body>
-<p>A url filter plugin that validates given urls.</p>
+<p>URL filter plugin that validates given urls.</p>
<p>This plugin runs a series of tests for the given url to make sure that given
url is valid and 'fetchable'.</p>
<p>Note: This plugin should <b>only</b> be used for web-related protocols such
Modified: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java (original)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java Tue Jun 24 21:41:28 2014
@@ -31,7 +31,13 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.conf.Configured;
import org.apache.oro.text.regex.*;
-/** Converts URLs to a normal form . */
+/**
+ * Converts URLs to a normal form:
+ * <ul>
+ * <li>remove dot segments in path: <code>/./</code> or <code>/../</code></li>
+ * <li>remove default ports, e.g. 80 for protocol <code>http://</code></li>
+ * </ul>
+ */
public class BasicURLNormalizer extends Configured implements URLNormalizer {
public static final Logger LOG = LoggerFactory.getLogger(BasicURLNormalizer.class);
Added: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer performing basic normalizations: remove default ports
+ * and dot segments in path.
+ */
+package org.apache.nutch.net.urlnormalizer.basic;
Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer dummy which does not change URLs. Required because at least
+ * one URL normalizer must be defined in any scope.
+ */
+package org.apache.nutch.net.urlnormalizer.pass;
Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java (added)
+++ nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL normalizer with configurable rules based on regular expressions
+ * ({@link java.util.regex.Pattern}).
+ */
+package org.apache.nutch.net.urlnormalizer.regex;
Propchange: nutch/branches/2.x/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 24 21:41:28 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1787 update and complete API doc overview page (snagel)
+
* NUTCH-1767 remove special treatment of "params" in relative links (snagel)
* NUTCH-1718 redefine http.robots.agent as "additional agent names" (snagel, Tejas Patil, Daniel Kugel)
Modified: nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Tue Jun 24 21:41:28 2014
@@ -172,6 +172,8 @@
<packageset dir="${plugins.dir}/index-metadata/src/java"/>
<packageset dir="${plugins.dir}/index-more/src/java"/>
<packageset dir="${plugins.dir}/index-static/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
<packageset dir="${plugins.dir}/indexer-solr/src/java"/>
<packageset dir="${plugins.dir}/language-identifier/src/java"/>
<packageset dir="${plugins.dir}/lib-http/src/java"/>
@@ -184,11 +186,11 @@
<packageset dir="${plugins.dir}/parse-swf/src/java"/>
<packageset dir="${plugins.dir}/parse-tika/src/java"/>
<packageset dir="${plugins.dir}/parse-zip/src/java"/>
- <packageset dir="${plugins.dir}/lib-http/src/java"/>
<packageset dir="${plugins.dir}/protocol-file/src/java"/>
<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
<packageset dir="${plugins.dir}/protocol-http/src/java"/>
<packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+ <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
<packageset dir="${plugins.dir}/scoring-link/src/java"/>
<packageset dir="${plugins.dir}/scoring-opic/src/java"/>
<packageset dir="${plugins.dir}/subcollection/src/java"/>
@@ -198,12 +200,13 @@
<packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
- <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
<packageset dir="${plugins.dir}/urlmeta/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+ <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+ <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
<link href="${javadoc.link.java}"/>
@@ -221,8 +224,10 @@
<group title="Plugins API" packages="${plugins.api}"/>
<group title="Protocol Plugins" packages="${plugins.protocol}"/>
<group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+ <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
<group title="Scoring Plugins" packages="${plugins.scoring}"/>
<group title="Parse Plugins" packages="${plugins.parse}"/>
+ <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
<group title="Indexing Filter Plugins" packages="${plugins.index}"/>
<group title="Indexer Plugins" packages="${plugins.indexer}"/>
<group title="Misc. Plugins" packages="${plugins.misc}"/>
@@ -572,6 +577,8 @@
<packageset dir="${plugins.dir}/index-metadata/src/java"/>
<packageset dir="${plugins.dir}/index-more/src/java"/>
<packageset dir="${plugins.dir}/index-static/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-dummy/src/java"/>
+ <packageset dir="${plugins.dir}/indexer-elastic/src/java/" />
<packageset dir="${plugins.dir}/indexer-solr/src/java"/>
<packageset dir="${plugins.dir}/language-identifier/src/java"/>
<packageset dir="${plugins.dir}/lib-http/src/java"/>
@@ -588,6 +595,7 @@
<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
<packageset dir="${plugins.dir}/protocol-http/src/java"/>
<packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+ <packageset dir="${plugins.dir}/scoring-depth/src/java"/>
<packageset dir="${plugins.dir}/scoring-link/src/java"/>
<packageset dir="${plugins.dir}/scoring-opic/src/java"/>
<packageset dir="${plugins.dir}/subcollection/src/java"/>
@@ -601,7 +609,9 @@
<packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
<packageset dir="${plugins.dir}/urlmeta/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+ <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+ <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/>
<packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
<link href="${javadoc.link.java}"/>
@@ -619,8 +629,10 @@
<group title="Plugins API" packages="${plugins.api}"/>
<group title="Protocol Plugins" packages="${plugins.protocol}"/>
<group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+ <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/>
<group title="Scoring Plugins" packages="${plugins.scoring}"/>
<group title="Parse Plugins" packages="${plugins.parse}"/>
+ <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/>
<group title="Indexing Filter Plugins" packages="${plugins.index}"/>
<group title="Indexer Plugins" packages="${plugins.indexer}"/>
<group title="Misc. Plugins" packages="${plugins.misc}"/>
@@ -944,9 +956,9 @@
<source path="${basedir}/src/plugin/index-anchor/src/test/" />
<source path="${basedir}/src/plugin/index-basic/src/java/" />
<source path="${basedir}/src/plugin/index-basic/src/test/" />
+ <source path="${basedir}/src/plugin/indexer-dummy/src/java/" />
<source path="${basedir}/src/plugin/indexer-solr/src/java/" />
<source path="${basedir}/src/plugin/indexer-elastic/src/java/" />
- <source path="${basedir}/src/plugin/indexer-dummy/src/java/" />
<source path="${basedir}/src/plugin/index-metadata/src/java/" />
<source path="${basedir}/src/plugin/index-more/src/java/" />
<source path="${basedir}/src/plugin/index-more/src/test/" />
Modified: nutch/trunk/default.properties
URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Tue Jun 24 21:41:28 2014
@@ -97,22 +97,25 @@ plugins.urlfilter=\
org.apache.nutch.urlfilter.domain*:\
org.apache.nutch.urlfilter.domainblacklist*:\
org.apache.nutch.urlfilter.prefix*:\
- org.apache.nutch.urlfilter.regex*\
+ org.apache.nutch.urlfilter.regex*:\
org.apache.nutch.urlfilter.suffix*:\
org.apache.nutch.urlfilter.validator*
#
# URL Normalizer Plugins
#
-plugins.urlfilter=\
+plugins.urlnormalizer=\
org.apache.nutch.net.urlnormalizer.basic*:\
+ org.apache.nutch.net.urlnormalizer.host*:\
org.apache.nutch.net.urlnormalizer.pass*:\
+ org.apache.nutch.net.urlnormalizer.querystring*:\
org.apache.nutch.net.urlnormalizer.regex*
#
# Scoring Plugins
#
plugins.scoring=\
+ org.apache.nutch.scoring.depth*:\
org.apache.nutch.scoring.link*:\
org.apache.nutch.scoring.opic*:\
org.apache.nutch.scoring.tld*:\
@@ -133,8 +136,9 @@ plugins.parse=\
#
# Parse Filter Plugins
#
-plugins.parse=\
- org.apache.nutch.parse.headings*
+plugins.parsefilter=\
+ org.apache.nutch.parse.headings*:\
+ org.apache.nutch.parse.metatags*
#
# Indexing Filter Plugins
@@ -144,6 +148,7 @@ plugins.index=\
org.apache.nutch.indexer.basic*:\
org.apache.nutch.indexer.feed*:\
org.apache.nutch.indexer.metadata*:\
+ org.apache.nutch.indexer.more*:\
org.apache.nutch.indexer.static*:\
org.apache.nutch.indexer.subcollection*:\
org.apache.nutch.indexer.tld*:\
@@ -153,18 +158,20 @@ plugins.index=\
# Indexing Backend Plugins
#
plugins.indexer=\
+ org.apache.nutch.indexwriter.dummy*:\
+ org.apache.nutch.indexwriter.elastic*:\
org.apache.nutch.indexwriter.solr*
#
# Misc. Plugins
#
# (gathers plugins that cannot be dispatched
-# in any category, mainly because they contains
+# in any category, mainly because they contain
# many extension points)
#
plugins.misc=\
org.apache.nutch.collection*:\
org.apache.nutch.analysis.lang*:\
- org.creativecommons.nutch*
- org.apache.nutch.microformats.reltag*:\
+ org.creativecommons.nutch*:\
+ org.apache.nutch.microformats.reltag*
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-Crawl control code.
+Crawl control code and tools to run the crawler.
</body>
</html>
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,10 @@
<html>
<body>
-Maintain Lucene full-text indexes.
+Index content, configure and run indexing and cleaning jobs to
+add, update, and delete documents from an index. Two tasks are
+delegated to plugins:
+<ul>
+<li>indexing filters fill index fields of each documents</li>
+<li>index writer plugins send documents to index back-ends (Solr, etc.).
</body>
</html>
Added: nutch/trunk/src/java/org/apache/nutch/net/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/net/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Web-related interfaces: URL {@link org.apache.nutch.net.URLFilter filters}
+ * and {@link org.apache.nutch.net.URLNormalizer normalizers}.
+ */
+package org.apache.nutch.net;
Propchange: nutch/trunk/src/java/org/apache/nutch/net/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/net/protocols/Response.java Tue Jun 24 21:41:28 2014
@@ -25,7 +25,7 @@ import org.apache.nutch.metadata.Metadat
/**
- * A response inteface. Makes all protocols model HTTP.
+ * A response interface. Makes all protocols model HTTP.
*/
public interface Response extends HttpHeaders {
Added: nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Helper classes related to the {@link org.apache.nutch.protocol.Protocol Protocol}
+ * interface, sea also {@link org.apache.nutch.protocol}.
+ */
+package org.apache.nutch.net.protocols;
Propchange: nutch/trunk/src/java/org/apache/nutch/net/protocols/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/parse/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/parse/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.parse.Parse Parse} interface and related classes.
+ */
+package org.apache.nutch.parse;
Propchange: nutch/trunk/src/java/org/apache/nutch/parse/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes related to the {@link org.apache.nutch.protocol.Protocol Protocol} interface,
+ * see also {@link org.apache.nutch.net.protocols}.
+ */
+package org.apache.nutch.protocol;
Propchange: nutch/trunk/src/java/org/apache/nutch/protocol/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
+ */
+package org.apache.nutch.scoring;
Propchange: nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring implementation based on link analysis
+ * ({@link org.apache.nutch.scoring.webgraph.LinkRank}),
+ * see {@link org.apache.nutch.scoring.webgraph.WebGraph}.
+ */
+package org.apache.nutch.scoring.webgraph;
Propchange: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/segment/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/segment/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A segment stores all data from on generate/fetch/update cycle:
+ * fetch list, protocol status, raw content, parsed content, and extracted outgoing links.
+ */
+package org.apache.nutch.segment;
Propchange: nutch/trunk/src/java/org/apache/nutch/segment/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tools to read the
+ * <a href="http://archive.org/web/researcher/ArcFileFormat.php">Arc file format</a>.
+ */
+package org.apache.nutch.tools.arc;
Propchange: nutch/trunk/src/java/org/apache/nutch/tools/arc/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/tools/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous tools.
+ */
+package org.apache.nutch.tools;
Propchange: nutch/trunk/src/java/org/apache/nutch/tools/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Proxy to {@link org.apache.nutch.tools.Benchmark benchmark} the crawler.
+ */
+package org.apache.nutch.tools.proxy;
Propchange: nutch/trunk/src/java/org/apache/nutch/tools/proxy/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/java/org/apache/nutch/util/domain/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/domain/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/domain/package.html (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/domain/package.html Tue Jun 24 21:41:28 2014
@@ -1,8 +1,6 @@
<html>
<body>
-<h2> org.apache.nutch.util.domain</h2>
-
-<p>This package contains classes for domain analysis.</p>
+<h2>Classes for domain name analysis.</h2>
for information please refer to following urls :
<ul>
Added: nutch/trunk/src/java/org/apache/nutch/util/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/package-info.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/util/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous utility classes.
+ */
+package org.apache.nutch.util;
Propchange: nutch/trunk/src/java/org/apache/nutch/util/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/java/overview.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/overview.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/java/overview.html (original)
+++ nutch/trunk/src/java/overview.html Tue Jun 24 21:41:28 2014
@@ -3,7 +3,7 @@
<title>Apache Nutch</title>
</head>
<body>
-<p>Apache Nutch is an open source web-search software project. </p>
+<p>Apache Nutch is a highly extensible and scalable open source web crawler software project.</p>
<p>Nutch is a project of the Apache Software Foundation and is part of the larger Apache community of developers and users.</p>
</body>
</html>
Added: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java (added)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to index meta data from RSS feeds.
+ */
+package org.apache.nutch.indexer.feed;
Propchange: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java (added)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse RSS feeds.
+ */
+package org.apache.nutch.parse.feed;
Propchange: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java (added)
+++ nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse filter to extract headings (h1, h2, etc.) from DOM parse tree.
+ */
+package org.apache.nutch.parse.headings;
Propchange: nutch/trunk/src/plugin/headings/src/java/org/apache/nutch/parse/headings/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html (original)
+++ nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A basic indexing plugin.</p><p></p>
+<p>A basic indexing plugin, adds basic fields: url, host, title, content, etc.</p><p></p>
</body>
</html>
Added: nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java (added)
+++ nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to add document metadata to the index.
+ * Metadata may come from CrawlDb, parse or content metadata.
+ */
+package org.apache.nutch.indexer.metadata;
Propchange: nutch/trunk/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html (original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,6 @@
<html>
<body>
-<p>A more indexing plugin.</p><p></p>
+<p>A more indexing plugin, adds "more" index fields:
+last modified date, MIME type, content length.</p><p></p>
</body>
</html>
Added: nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for debugging, writes pairs of <action, url> to a
+ * text file, action is one of "add", "update", or "delete".
+ */
+package org.apache.nutch.indexwriter.dummy;
Propchange: nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://www.elasticsearch.org/">Elasticsearch</a>.
+ */
+package org.apache.nutch.indexwriter.elastic;
Propchange: nutch/trunk/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java (added)
+++ nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Index writer plugin for <a href="http://lucene.apache.org/solr/">Apache Solr</a>.
+ */
+package org.apache.nutch.indexwriter.solr;
Propchange: nutch/trunk/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java (added)
+++ nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} library,
+ * abstracting away from regular expression implementations.
+ */
+package org.apache.nutch.urlfilter.api;
+
Propchange: nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse wrapper to run external command to do the parsing.
+ */
+package org.apache.nutch.parse.ext;
Propchange: nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parser and parse filter plugin to extract all (possible) links
+ * from JavaScript files and embedded JavaScript code snippets.
+ */
+package org.apache.nutch.parse.js;
Propchange: nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/parse-metatags/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/plugin.xml?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/plugin.xml (original)
+++ nutch/trunk/src/plugin/parse-metatags/plugin.xml Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
name="MetaTags Parser"
point="org.apache.nutch.parse.HtmlParseFilter">
<implementation id="MetaTagsParser"
- class="org.apache.nutch.parse.MetaTagsParser"/>
+ class="org.apache.nutch.parse.metatags.MetaTagsParser"/>
</extension>
</plugin>
Added: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java (added)
+++ nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse.metatags;
+
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.HtmlParseFilter;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseResult;
+import org.apache.nutch.protocol.Content;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Parse HTML meta tags (keywords, description) and store them in the parse
+ * metadata so that they can be indexed with the index-metadata plugin with the
+ * prefix 'metatag.'
+ */
+public class MetaTagsParser implements HtmlParseFilter {
+
+ private static final Log LOG = LogFactory.getLog(MetaTagsParser.class
+ .getName());
+
+ private Configuration conf;
+
+ private Set<String> metatagset = new HashSet<String>();
+
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ // specify whether we want a specific subset of metadata
+ // by default take everything we can find
+ String metatags = conf.get("metatags.names", "*");
+ String[] values = metatags.split(";");
+ for (String val : values)
+ metatagset.add(val.toLowerCase());
+ }
+
+ public Configuration getConf() {
+ return this.conf;
+ }
+
+ public ParseResult filter(Content content, ParseResult parseResult,
+ HTMLMetaTags metaTags, DocumentFragment doc) {
+
+ Parse parse = parseResult.get(content.getUrl());
+ Metadata metadata = parse.getData().getParseMeta();
+
+ // check in the metadata first : the tika-parser
+ // might have stored the values there already
+
+ for (String mdName : metadata.names()) {
+ String value = metadata.get(mdName);
+ // check whether the name is in the list of what we want or if
+ // specified *
+ if (metatagset.contains("*") || metatagset.contains(mdName.toLowerCase())) {
+ LOG.debug("Found meta tag : " + mdName + "\t" + value);
+ metadata.add("metatag." + mdName.toLowerCase(), value);
+ }
+ }
+
+ Metadata generalMetaTags = metaTags.getGeneralTags();
+ for (String tagName : generalMetaTags.names() ) {
+ String[] tagValues = generalMetaTags.getValues(tagName);
+
+ for ( String tagValue : tagValues ) {
+ // check whether the name is in the list of what we want or if
+ // specified *
+ if (metatagset.contains("*") || metatagset.contains(tagName.toLowerCase())) {
+ LOG.debug("Found meta tag : " + tagName + "\t" + tagValue);
+ metadata.add("metatag." + tagName.toLowerCase(), tagValue);
+ }
+ }
+ }
+
+ Properties httpequiv = metaTags.getHttpEquivTags();
+ for (Enumeration tagNames = httpequiv.propertyNames(); tagNames
+ .hasMoreElements();) {
+ String name = (String) tagNames.nextElement();
+ String value = httpequiv.getProperty(name);
+ // check whether the name is in the list of what we want or if
+ // specified *
+ if (metatagset.contains("*") || metatagset.contains(name.toLowerCase())) {
+ LOG.debug("Found meta tag : " + name + "\t" + value);
+ metadata.add("metatag." + name.toLowerCase(), value);
+ }
+ }
+
+ return parseResult;
+ }
+
+}
Propchange: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/MetaTagsParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse filter to extract meta tags: keywords, description, etc.
+ * Used in combination with index-metadata plugin
+ * (see {@link org.apache.nutch.indexer.metadata}).
+ */
+package org.apache.nutch.parse.metatags;
Propchange: nutch/trunk/src/plugin/parse-metatags/src/java/org/apache/nutch/parse/metatags/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java?rev=1605204&r1=1605150&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java (original)
+++ nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/metatags/TestMetatagParser.java Tue Jun 24 21:41:28 2014
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nutch.parse.html;
+package org.apache.nutch.parse.metatags;
import java.util.Set;
import java.util.TreeSet;
Added: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse Flash SWF files.
+ */
+package org.apache.nutch.parse.swf;
Propchange: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse various document formats with help of
+ * <a href="http://tika.apache.org/">Apache Tika</a>.
+ */
+package org.apache.nutch.parse.tika;
Propchange: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java (added)
+++ nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Parse ZIP files: embedded files are recursively passed to appropriate parsers.
+ */
+package org.apache.nutch.parse.zip;
Propchange: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java (original)
+++ nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/DepthScoringFilter.java Tue Jun 24 21:41:28 2014
@@ -22,7 +22,7 @@ import org.apache.nutch.scoring.ScoringF
/**
* This scoring filter limits the number of hops from the initial seed urls. If
- * the numbe of hops exceeds the depth (either the default value, or the one
+ * the number of hops exceeds the depth (either the default value, or the one
* set in the injector file) then all outlinks from that url are discarded,
* effectively stopping further crawling along this path.
*/
Added: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter to stop crawling at a configurable depth
+ * (number of "hops" from seed URLs).
+ */
+package org.apache.nutch.scoring.depth;
Propchange: nutch/trunk/src/plugin/scoring-depth/src/java/org/apache/nutch/scoring/depth/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter used in conjunction with
+ * {@link org.apache.nutch.scoring.webgraph.WebGraph}.
+ */
+package org.apache.nutch.scoring.link;
Propchange: nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java (added)
+++ nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Scoring filter implementing a variant of the Online Page Importance Computation
+ * (OPIC) algorithm.
+ */
+package org.apache.nutch.scoring.opic;
Propchange: nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java (added)
+++ nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Indexing filter to assign documents to subcollections.
+ * The field "subcollection" is added and filled with a collection name
+ * defined in a configuration file and selected by pattern, see
+ * {@link org.apache.nutch.collection}.
+ */
+package org.apache.nutch.indexer.subcollection;
Propchange: nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/package.html Tue Jun 24 21:41:28 2014
@@ -1,7 +1,7 @@
<html>
<body>
<p>
-A url filter plugin based on
+URL filter plugin based on
<a href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
Automata for Java<sup>TM</sup>.
</p>
Added: nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to include only URLs which match an element in a given list of
+ * domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * (exclude URLs by host or domain).
+ */
+package org.apache.nutch.urlfilter.domain;
+
Propchange: nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to exclude URLs by domain suffixes, domain names, and/or host names.
+ * See {@link org.apache.nutch.urlfilter.domain} for the counterpart (include only URLs
+ * matching host or domain).
+ */
+package org.apache.nutch.urlfilter.domainblacklist;
+
Propchange: nutch/trunk/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include only URLs which match one of a given list of URL prefixes.</p>
</body>
</html>
Modified: nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/package.html Tue Jun 24 21:41:28 2014
@@ -1,5 +1,5 @@
<html>
<body>
-<p>A url filter plugin.</p><p></p>
+<p>URL filter plugin to include and/or exclude URLs matching Java regular expressions.</p>
</body>
</html>
Added: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java?rev=1605204&view=auto
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java (added)
+++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java Tue Jun 24 21:41:28 2014
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * URL filter plugin to either exclude or include only URLs which match
+ * one of the given (path) suffixes.
+ */
+package org.apache.nutch.urlfilter.suffix;
+
Propchange: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html?rev=1605204&r1=1605203&r2=1605204&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html (original)
+++ nutch/trunk/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html Tue Jun 24 21:41:28 2014
@@ -1,6 +1,6 @@
<html>
<body>
-<p>A url filter plugin that validates given urls.</p>
+<p>URL filter plugin that validates given urls.</p>
<p>This plugin runs a series of tests for the given url to make sure that given
url is valid and 'fetchable'.</p>
<p>Note: This plugin should <b>only</b> be used for web-related protocols such