You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2013/06/20 11:11:14 UTC
svn commit: r1494895 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml
Author: markus
Date: Thu Jun 20 09:11:13 2013
New Revision: 1494895
URL: http://svn.apache.org/r1494895
Log:
NUTCH-1583 Headings plugin to support multivalued headings
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1494895&r1=1494894&r2=1494895&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jun 20 09:11:13 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1583 Headings plugin to support multivalued headings (markus)
+
* NUTCH-1245 URL gone with 404 after db.fetch.interval.max stays db_unfetched in CrawlDb (snagel)
* NUTCH-1527 Elasticsearch indexer (lufeng + markus)
Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1494895&r1=1494894&r2=1494895&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Thu Jun 20 09:11:13 2013
@@ -1463,4 +1463,18 @@
</description>
</property>
+<!-- Headings plugin properties -->
+
+<property>
+ <name>headings</name>
+ <value>h1,h2</value>
+ <description>Comma separated list of headings to retrieve from the document</description>
+</property>
+
+<property>
+ <name>headings.multivalued</name>
+ <value>false</value>
+ <description>Whether to support multivalued headings.</description>
+</property>
+
</configuration>