You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/01 14:22:39 UTC
svn commit: r959631 - in /nutch/branches/branch-1.2: CHANGES.txt
src/java/org/apache/nutch/crawl/MD5Signature.java
Author: ab
Date: Thu Jul 1 12:22:39 2010
New Revision: 959631
URL: http://svn.apache.org/viewvc?rev=959631&view=rev
Log:
Merge from trunk rev. 959629.
Modified:
nutch/branches/branch-1.2/CHANGES.txt
nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java
Modified: nutch/branches/branch-1.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/CHANGES.txt?rev=959631&r1=959630&r2=959631&view=diff
==============================================================================
--- nutch/branches/branch-1.2/CHANGES.txt (original)
+++ nutch/branches/branch-1.2/CHANGES.txt Thu Jul 1 12:22:39 2010
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.2 - Current Development
+* NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
+
* NUTCH-831 Allow configuration of how fields crawled by Nutch are stored / indexed /
tokenized (Jeroen van Vianen via mattmann)
Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java?rev=959631&r1=959630&r2=959631&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/MD5Signature.java Thu Jul 1 12:22:39 2010
@@ -33,7 +33,6 @@ public class MD5Signature extends Signat
public byte[] calculate(Content content, Parse parse) {
byte[] data = content.getContent();
if (data == null) data = content.getUrl().getBytes();
- StringBuilder buf = new StringBuilder().append(data).append(parse.getText());
- return MD5Hash.digest(buf.toString().getBytes()).getDigest();
+ return MD5Hash.digest(data).getDigest();
}
}