You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/02/23 08:19:21 UTC

svn commit: r1292679 - /nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java

Author: ferdy
Date: Thu Feb 23 07:19:20 2012
New Revision: 1292679

URL: http://svn.apache.org/viewvc?rev=1292679&view=rev
Log:
integrate NUTCH-965 Skip parsing for truncated documents (commit 2)

Modified:
    nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java

Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1292679&r1=1292678&r2=1292679&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Thu Feb 23 07:19:20 2012
@@ -606,13 +606,11 @@ extends GoraReducer<IntWritable, FetchEn
       String key = TableUtil.reverseUrl(fit.url);
 
       if (parse) {
-        if (skipTruncated) {
-          if (!ParserJob.isTruncated(fit.url, fit.page)) {
-            URLWebPage redirectedPage = parseUtil.process(key, fit.page);
-            if (redirectedPage != null) {
-              context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
-                            redirectedPage.getDatum());
-            }
+        if (!skipTruncated || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
+          URLWebPage redirectedPage = parseUtil.process(key, fit.page);
+          if (redirectedPage != null) {
+            context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
+                redirectedPage.getDatum());
           }
         }
       }