You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/08/17 18:35:37 UTC

svn commit: r432287 - /lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java

Author: ab
Date: Thu Aug 17 09:35:35 2006
New Revision: 432287

URL: http://svn.apache.org/viewvc?rev=432287&view=rev
Log:
Apply patch in NUTCH-348 - Generator used the lowest score instead of
the highest. Contributed by Chris Schneider and Stefan Groschupf.

Modified:
    lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java

Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java?rev=432287&r1=432286&r2=432287&view=diff
==============================================================================
--- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Generator.java Thu Aug 17 09:35:35 2006
@@ -59,7 +59,11 @@
     public void write(DataOutput out) throws IOException {
       url.write(out);
       datum.write(out);
-    }    
+    }
+    
+    public String toString() {
+      return "url=" + url.toString() + ", datum=" + datum.toString();
+    }
   }
 
   /** Selects entries due for fetch. */
@@ -118,7 +122,7 @@
           LOG.warn("Couldn't filter generatorSortValue for " + key + ": " + sfe);
         }
       }
-      // sort by decreasing score
+      // sort by decreasing score, using DecreasingFloatComparator
       sortValue.set(sort);
       entry.datum = crawlDatum;
       entry.url = (UTF8)key;
@@ -196,6 +200,20 @@
 
   }
 
+  public static class DecreasingFloatComparator extends WritableComparator {
+
+    public DecreasingFloatComparator() {
+      super(FloatWritable.class);
+    }
+
+    /** Compares two FloatWritables decreasing. */
+    public int compare(WritableComparable o1, WritableComparable o2) {
+      float thisValue = ((FloatWritable) o1).get();
+      float thatValue = ((FloatWritable) o2).get();
+      return (thisValue<thatValue ? 1 : (thisValue == thatValue ? 0 : -1));
+    }
+  }
+  
   public static class SelectorInverseMapper extends MapReduceBase implements Mapper {
 
     public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException {
@@ -270,7 +288,7 @@
     if (LOG.isInfoEnabled()) {
       LOG.info("Generator: starting");
       LOG.info("Generator: segment: " + segment);
-      LOG.info("Generator: Selecting most-linked urls due for fetch.");
+      LOG.info("Generator: Selecting best-scoring urls due for fetch.");
     }
 
     // map to inverted subset due for fetch, sort by link count
@@ -296,6 +314,7 @@
     job.setOutputPath(tempDir);
     job.setOutputFormat(SequenceFileOutputFormat.class);
     job.setOutputKeyClass(FloatWritable.class);
+    job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
     job.setOutputValueClass(SelectorEntry.class);
     JobClient.runJob(job);