You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/01 14:09:51 UTC
svn commit: r959629 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/crawl/MD5Signature.java
Author: ab
Date: Thu Jul 1 12:09:51 2010
New Revision: 959629
URL: http://svn.apache.org/viewvc?rev=959629&view=rev
Log:
NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=959629&r1=959628&r2=959629&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jul 1 12:09:51 2010
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.0 - Current Development
+* NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
+
* NUTCH-278 Fetcher-status might need clarification: kbit/s instead of kb/s shown (Alex McLintock via mattmann)
* NUTCH-833 Website is still Lucene branded (mattmann, Alex McLintock)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java?rev=959629&r1=959628&r2=959629&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java Thu Jul 1 12:09:51 2010
@@ -33,7 +33,6 @@ public class MD5Signature extends Signat
public byte[] calculate(Content content, Parse parse) {
byte[] data = content.getContent();
if (data == null) data = content.getUrl().getBytes();
- StringBuilder buf = new StringBuilder().append(data).append(parse.getText());
- return MD5Hash.digest(buf.toString().getBytes()).getDigest();
+ return MD5Hash.digest(data).getDigest();
}
}