You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/06/09 00:16:43 UTC

svn commit: r412866 - /lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java

Author: ab
Date: Thu Jun  8 15:16:42 2006
New Revision: 412866

URL: http://svn.apache.org/viewvc?rev=412866&view=rev
Log:
Fix an accidental ommission - somehow this part of the scoring patch
was never applied...

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=412866&r1=412865&r2=412866&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Jun  8 15:16:42 2006
@@ -30,6 +30,8 @@
 
 import org.apache.nutch.net.URLFilterException;
 import org.apache.nutch.net.URLFilters;
+import org.apache.nutch.scoring.ScoringFilterException;
+import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 
@@ -68,6 +70,7 @@
     private int maxPerHost;
     private Partitioner hostPartitioner = new PartitionUrlByHost();
     private URLFilters filters;
+    private ScoringFilters scfilters;
     private SelectorEntry entry = new SelectorEntry();
     private FloatWritable sortValue = new FloatWritable();
     private boolean byIP;
@@ -79,6 +82,7 @@
       maxPerHost = job.getInt("generate.max.per.host", -1);
       byIP = job.getBoolean("generate.max.per.host.by.ip", false);
       filters = new URLFilters(job);
+      scfilters = new ScoringFilters(job);
     }
 
     public void close() {}
@@ -103,8 +107,14 @@
       if (crawlDatum.getFetchTime() > curTime)
         return;                                   // not time yet
 
+      float sort = 1.0f;
+      try {
+        sort = scfilters.generatorSortValue((UTF8)key, crawlDatum, sort);
+      } catch (ScoringFilterException sfe) {
+        LOG.warning("Couldn't filter generatorSortValue for " + key + ": " + sfe);
+      }
       // sort by decreasing score
-      sortValue.set(crawlDatum.getScore());
+      sortValue.set(sort);
       entry.datum = crawlDatum;
       entry.url = (UTF8)key;
       output.collect(sortValue, entry);          // invert for sort by score