You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/07/26 22:02:41 UTC

svn commit: r225382 - /lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java

Author: cutting
Date: Tue Jul 26 13:02:35 2005
New Revision: 225382

URL: http://svn.apache.org/viewcvs?rev=225382&view=rev
Log:
Improve hash function used when sorting fetchlists by URL hash to
de-emphasize the host, better scattering hosts through fetchlists.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java?rev=225382&r1=225381&r2=225382&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Generator.java Tue Jul 26 13:02:35 2005
@@ -101,7 +101,9 @@
 
     private static int hash(byte[] bytes, int start, int length) {
       int hash = 1;
-      for (int i = 0; i < length; i++)
+      // make later bytes more significant in hash code, so that sorting by
+      // hashcode correlates less with by-host ordering.
+      for (int i = length-1; i >= 0; i--)
         hash = (31 * hash) + (int)bytes[start+i];
       return hash;
     }