You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2013/06/20 11:11:14 UTC

svn commit: r1494895 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml

Author: markus
Date: Thu Jun 20 09:11:13 2013
New Revision: 1494895

URL: http://svn.apache.org/r1494895
Log:
NUTCH-1583 Headings plugin to support multivalued headings

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/nutch-default.xml

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1494895&r1=1494894&r2=1494895&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jun 20 09:11:13 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk): Current Development
 
+* NUTCH-1583 Headings plugin to support multivalued headings (markus)
+
 * NUTCH-1245 URL gone with 404 after db.fetch.interval.max stays db_unfetched in CrawlDb (snagel)
 
 * NUTCH-1527 Elasticsearch indexer (lufeng + markus)

Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1494895&r1=1494894&r2=1494895&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Thu Jun 20 09:11:13 2013
@@ -1463,4 +1463,18 @@
   </description>
 </property>
 
+<!-- Headings plugin properties -->
+
+<property>
+  <name>headings</name>
+  <value>h1,h2</value>
+  <description>Comma separated list of headings to retrieve from the document</description>
+</property>
+
+<property>
+  <name>headings.multivalued</name>
+  <value>false</value>
+  <description>Whether to support multivalued headings.</description>
+</property>
+
 </configuration>