You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/03/17 22:56:32 UTC

svn commit: r1578620 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java

Author: snagel
Date: Mon Mar 17 21:56:32 2014
New Revision: 1578620

URL: http://svn.apache.org/r1578620
Log:
NUTCH-1671 indexchecker to add digest field

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1578620&r1=1578619&r2=1578620&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Mar 17 21:56:32 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1671 indexchecker to add digest field (snagel, lufeng)
+
 * NUTCH-1645 Junit Test Case for Adaptive Fetch Schedule class (Yasin Kılınç, lufeng, Sertac TURKEL via snagel)
 
 * NUTCH-1478 Parse-metatags and index-metadata plugin for Nutch 2.x series (kiran, Nguyen Manh Tien, Talat UYARER, Vangelis Karvounis via lewismc)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1578620&r1=1578619&r2=1578620&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java Mon Mar 17 21:56:32 2014
@@ -37,6 +37,7 @@ import org.apache.nutch.protocol.Protoco
 import org.apache.nutch.protocol.ProtocolStatusUtils;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.StringUtil;
 import org.apache.nutch.util.URLUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -122,6 +123,7 @@ public class IndexingFiltersChecker exte
     }
 
     NutchDocument doc = new NutchDocument();
+    doc.add("digest", StringUtil.toHexString(page.getSignature()));
 
     try {
       doc = indexers.filter(doc, url, page);