You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/02/23 08:19:21 UTC
svn commit: r1292679 -
/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
Author: ferdy
Date: Thu Feb 23 07:19:20 2012
New Revision: 1292679
URL: http://svn.apache.org/viewvc?rev=1292679&view=rev
Log:
integrate NUTCH-965 Skip parsing for truncated documents (commit 2)
Modified:
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1292679&r1=1292678&r2=1292679&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Thu Feb 23 07:19:20 2012
@@ -606,13 +606,11 @@ extends GoraReducer<IntWritable, FetchEn
String key = TableUtil.reverseUrl(fit.url);
if (parse) {
- if (skipTruncated) {
- if (!ParserJob.isTruncated(fit.url, fit.page)) {
- URLWebPage redirectedPage = parseUtil.process(key, fit.page);
- if (redirectedPage != null) {
- context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
- redirectedPage.getDatum());
- }
+ if (!skipTruncated || (skipTruncated && !ParserJob.isTruncated(fit.url, fit.page))) {
+ URLWebPage redirectedPage = parseUtil.process(key, fit.page);
+ if (redirectedPage != null) {
+ context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
+ redirectedPage.getDatum());
}
}
}