You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/04/24 01:55:09 UTC

svn commit: r1675735 - /nutch/trunk/conf/nutch-default.xml

Author: lewismc
Date: Thu Apr 23 23:55:09 2015
New Revision: 1675735

URL: http://svn.apache.org/r1675735
Log:
Add back in NUTCH-1927 property to nutch-default as revoved during commit @1675022

Modified:
    nutch/trunk/conf/nutch-default.xml

Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1675735&r1=1675734&r2=1675735&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Thu Apr 23 23:55:09 2015
@@ -118,6 +118,15 @@
 </property>
 
 <property>
+   <name>http.robot.rules.whitelist</name> 	 
+   <value></value> 	 
+   <description>Comma separated list of hostnames or IP addresses to ignore 	 
+   robot rules parsing for. Use with care and only if you are explicitly 	 
+   allowed by the site owner to ignore the site's robots.txt! 	 
+   </description> 	 
+</property> 	 
+   	   	   	   	   	   	 	  	 
+<property>
   <name>http.robots.403.allow</name>
   <value>true</value>
   <description>Some servers return HTTP status 403 (Forbidden) if