You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/01 14:09:51 UTC

svn commit: r959629 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/MD5Signature.java

Author: ab
Date: Thu Jul  1 12:09:51 2010
New Revision: 959629

URL: http://svn.apache.org/viewvc?rev=959629&view=rev
Log:
NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=959629&r1=959628&r2=959629&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jul  1 12:09:51 2010
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
+
 * NUTCH-278 Fetcher-status might need clarification: kbit/s instead of kb/s shown (Alex McLintock via mattmann)
 
 * NUTCH-833 Website is still Lucene branded (mattmann, Alex McLintock)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java?rev=959629&r1=959628&r2=959629&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java Thu Jul  1 12:09:51 2010
@@ -33,7 +33,6 @@ public class MD5Signature extends Signat
   public byte[] calculate(Content content, Parse parse) {
     byte[] data = content.getContent();
     if (data == null) data = content.getUrl().getBytes();
-    StringBuilder buf = new StringBuilder().append(data).append(parse.getText());
-    return MD5Hash.digest(buf.toString().getBytes()).getDigest();
+    return MD5Hash.digest(data).getDigest();
   }
 }