You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/07/09 17:22:31 UTC

svn commit: r1359215 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/fetcher/FetcherReducer.java

Author: ferdy
Date: Mon Jul  9 15:22:31 2012
New Revision: 1359215

URL: http://svn.apache.org/viewvc?rev=1359215&view=rev
Log:
NUTCH-1411 nutchgora fetcher.store.content does not work

Modified:
    nutch/branches/nutchgora/CHANGES.txt
    nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1359215&r1=1359214&r2=1359215&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Jul  9 15:22:31 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.1 - Current Development
 
+* NUTCH-1411 nutchgora fetcher.store.content does not work (Alexander Kingson via ferdy) 
+
 * NUTCH-1426 HostDb close() should close store instead of flush (ferdy)
 
 * NUTCH-1425 DbUpdaterJob declares PREV_SIGNATURE on input twice (ferdy)

Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1359215&r1=1359214&r2=1359215&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Mon Jul  9 15:22:31 2012
@@ -81,6 +81,7 @@ extends GoraReducer<IntWritable, FetchEn
 
   private FetchItemQueues fetchQueues;
 
+  private boolean storingContent;
   private boolean parse;
 
   private ParseUtil parseUtil;
@@ -665,6 +666,11 @@ extends GoraReducer<IntWritable, FetchEn
           }
         }
       }
+      //remove content if storingContent is false. Content is added to fit.page above 
+      //for ParseUtil be able to parse it. 
+      if(content != null && !storingContent){
+        fit.page.setContent(ByteBuffer.wrap(new byte[0]));
+      }
       context.write(key, fit.page);
     }
 
@@ -782,6 +788,7 @@ extends GoraReducer<IntWritable, FetchEn
     this.fetchQueues = new FetchItemQueues(conf);
     int threadCount = conf.getInt("fetcher.threads.fetch", 10);
     parse = conf.getBoolean(FetcherJob.PARSE_KEY, false);
+    storingContent=conf.getBoolean("fetcher.store.content", true);
     if (parse) {
       skipTruncated=conf.getBoolean(ParserJob.SKIP_TRUNCATED, true);
       parseUtil = new ParseUtil(conf);