You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2016/08/22 21:51:01 UTC

[3/5] nutch git commit: add hint and log warning that fetcher.store.robotstxt works only in combination with fetcher.store.content

add hint and log warning that fetcher.store.robotstxt works only in combination with fetcher.store.content


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/33cdca76
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/33cdca76
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/33cdca76

Branch: refs/heads/master
Commit: 33cdca76ac91a63445d4e761081e8124a23413af
Parents: 264eea0
Author: Sebastian Nagel <sn...@apache.org>
Authored: Fri Aug 19 15:32:34 2016 +0200
Committer: Sebastian Nagel <sn...@apache.org>
Committed: Fri Aug 19 15:32:34 2016 +0200

----------------------------------------------------------------------
 conf/nutch-default.xml                               | 6 ++++--
 src/java/org/apache/nutch/fetcher/FetcherThread.java | 6 +++++-
 2 files changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/conf/nutch-default.xml
----------------------------------------------------------------------
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 8c329bc..ec9d2d4 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -985,8 +985,10 @@
 <property>
   <name>fetcher.store.robotstxt</name>
   <value>false</value>
-  <description>If true, fetcher will store the robots.txt response
-  content and status for debugging or archival purposes.
+  <description>If true (and fetcher.store.content is also true),
+  fetcher will store the robots.txt response content and status for
+  debugging or archival purposes. The robots.txt is added to the
+  content/ folder of the fetched segment.
   </description>
 </property>
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/src/java/org/apache/nutch/fetcher/FetcherThread.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java
index 6024b8d..449e220 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherThread.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java
@@ -192,7 +192,11 @@ public class FetcherThread extends Thread {
     outlinksDepthDivisor = conf.getInt(
         "fetcher.follow.outlinks.depth.divisor", 2);
     if (conf.getBoolean("fetcher.store.robotstxt", false)) {
-      robotsTxtContent = new LinkedList<Content>();
+      if (storingContent) {
+        robotsTxtContent = new LinkedList<Content>();
+      } else {
+        LOG.warn("Ignoring fetcher.store.robotstxt because not storing content (fetcher.store.content)!");
+      }
     }
   }