You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/01 14:22:39 UTC

svn commit: r959631 - in /nutch/branches/branch-1.2: CHANGES.txt src/java/org/apache/nutch/crawl/MD5Signature.java

Author: ab
Date: Thu Jul  1 12:22:39 2010
New Revision: 959631

URL: http://svn.apache.org/viewvc?rev=959631&view=rev
Log:
Merge from trunk rev. 959629.

Modified:
    nutch/branches/branch-1.2/CHANGES.txt
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java

Modified: nutch/branches/branch-1.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/CHANGES.txt?rev=959631&r1=959630&r2=959631&view=diff
==============================================================================
--- nutch/branches/branch-1.2/CHANGES.txt (original)
+++ nutch/branches/branch-1.2/CHANGES.txt Thu Jul  1 12:22:39 2010
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.2 - Current Development
 
+* NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
+
 * NUTCH-831 Allow configuration of how fields crawled by Nutch are stored / indexed / 
   tokenized (Jeroen van Vianen via mattmann)
 

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java?rev=959631&r1=959630&r2=959631&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java Thu Jul  1 12:22:39 2010
@@ -33,7 +33,6 @@ public class MD5Signature extends Signat
   public byte[] calculate(Content content, Parse parse) {
     byte[] data = content.getContent();
     if (data == null) data = content.getUrl().getBytes();
-    StringBuilder buf = new StringBuilder().append(data).append(parse.getText());
-    return MD5Hash.digest(buf.toString().getBytes()).getDigest();
+    return MD5Hash.digest(data).getDigest();
   }
 }