You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/02/23 07:55:02 UTC
svn commit: r1292667 -
/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
Author: ferdy
Date: Thu Feb 23 06:55:02 2012
New Revision: 1292667
URL: http://svn.apache.org/viewvc?rev=1292667&view=rev
Log:
integrate NUTCH-965 Skip parsing for truncated documents (commit 2)
Modified:
nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1292667&r1=1292666&r2=1292667&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Thu Feb 23 06:55:02 2012
@@ -947,7 +947,7 @@ public class Fetcher extends Configured
/* Note: Fetcher will only follow meta-redirects coming from the
* original URL. */
if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
- if (!checkTruncated || (checkTruncated && ParseSegment.isTruncated(content))) {
+ if (!checkTruncated || (checkTruncated && !ParseSegment.isTruncated(content))) {
try {
parseResult = this.parseUtil.parse(content);
} catch (Exception e) {