You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2016/08/22 21:51:01 UTC
[3/5] nutch git commit: add hint and log warning that
fetcher.store.robotstxt works only in combination with fetcher.store.content
add hint and log warning that fetcher.store.robotstxt works only in combination with fetcher.store.content
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/33cdca76
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/33cdca76
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/33cdca76
Branch: refs/heads/master
Commit: 33cdca76ac91a63445d4e761081e8124a23413af
Parents: 264eea0
Author: Sebastian Nagel <sn...@apache.org>
Authored: Fri Aug 19 15:32:34 2016 +0200
Committer: Sebastian Nagel <sn...@apache.org>
Committed: Fri Aug 19 15:32:34 2016 +0200
----------------------------------------------------------------------
conf/nutch-default.xml | 6 ++++--
src/java/org/apache/nutch/fetcher/FetcherThread.java | 6 +++++-
2 files changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/conf/nutch-default.xml
----------------------------------------------------------------------
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 8c329bc..ec9d2d4 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -985,8 +985,10 @@
<property>
<name>fetcher.store.robotstxt</name>
<value>false</value>
- <description>If true, fetcher will store the robots.txt response
- content and status for debugging or archival purposes.
+ <description>If true (and fetcher.store.content is also true),
+ fetcher will store the robots.txt response content and status for
+ debugging or archival purposes. The robots.txt is added to the
+ content/ folder of the fetched segment.
</description>
</property>
http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/src/java/org/apache/nutch/fetcher/FetcherThread.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java
index 6024b8d..449e220 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherThread.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java
@@ -192,7 +192,11 @@ public class FetcherThread extends Thread {
outlinksDepthDivisor = conf.getInt(
"fetcher.follow.outlinks.depth.divisor", 2);
if (conf.getBoolean("fetcher.store.robotstxt", false)) {
- robotsTxtContent = new LinkedList<Content>();
+ if (storingContent) {
+ robotsTxtContent = new LinkedList<Content>();
+ } else {
+ LOG.warn("Ignoring fetcher.store.robotstxt because not storing content (fetcher.store.content)!");
+ }
}
}