You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/07/09 17:22:31 UTC
svn commit: r1359215 - in /nutch/branches/nutchgora: CHANGES.txt
src/java/org/apache/nutch/fetcher/FetcherReducer.java
Author: ferdy
Date: Mon Jul 9 15:22:31 2012
New Revision: 1359215
URL: http://svn.apache.org/viewvc?rev=1359215&view=rev
Log:
NUTCH-1411 nutchgora fetcher.store.content does not work
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1359215&r1=1359214&r2=1359215&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Jul 9 15:22:31 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.1 - Current Development
+* NUTCH-1411 nutchgora fetcher.store.content does not work (Alexander Kingson via ferdy)
+
* NUTCH-1426 HostDb close() should close store instead of flush (ferdy)
* NUTCH-1425 DbUpdaterJob declares PREV_SIGNATURE on input twice (ferdy)
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1359215&r1=1359214&r2=1359215&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Mon Jul 9 15:22:31 2012
@@ -81,6 +81,7 @@ extends GoraReducer<IntWritable, FetchEn
private FetchItemQueues fetchQueues;
+ private boolean storingContent;
private boolean parse;
private ParseUtil parseUtil;
@@ -665,6 +666,11 @@ extends GoraReducer<IntWritable, FetchEn
}
}
}
+ //remove content if storingContent is false. Content is added to fit.page above
+ //for ParseUtil be able to parse it.
+ if(content != null && !storingContent){
+ fit.page.setContent(ByteBuffer.wrap(new byte[0]));
+ }
context.write(key, fit.page);
}
@@ -782,6 +788,7 @@ extends GoraReducer<IntWritable, FetchEn
this.fetchQueues = new FetchItemQueues(conf);
int threadCount = conf.getInt("fetcher.threads.fetch", 10);
parse = conf.getBoolean(FetcherJob.PARSE_KEY, false);
+ storingContent=conf.getBoolean("fetcher.store.content", true);
if (parse) {
skipTruncated=conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
parseUtil = new ParseUtil(conf);