You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/06/09 00:16:43 UTC
svn commit: r412866 -
/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Author: ab
Date: Thu Jun 8 15:16:42 2006
New Revision: 412866
URL: http://svn.apache.org/viewvc?rev=412866&view=rev
Log:
Fix an accidental ommission - somehow this part of the scoring patch
was never applied...
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=412866&r1=412865&r2=412866&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Jun 8 15:16:42 2006
@@ -30,6 +30,8 @@
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
+import org.apache.nutch.scoring.ScoringFilterException;
+import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
@@ -68,6 +70,7 @@
private int maxPerHost;
private Partitioner hostPartitioner = new PartitionUrlByHost();
private URLFilters filters;
+ private ScoringFilters scfilters;
private SelectorEntry entry = new SelectorEntry();
private FloatWritable sortValue = new FloatWritable();
private boolean byIP;
@@ -79,6 +82,7 @@
maxPerHost = job.getInt("generate.max.per.host", -1);
byIP = job.getBoolean("generate.max.per.host.by.ip", false);
filters = new URLFilters(job);
+ scfilters = new ScoringFilters(job);
}
public void close() {}
@@ -103,8 +107,14 @@
if (crawlDatum.getFetchTime() > curTime)
return; // not time yet
+ float sort = 1.0f;
+ try {
+ sort = scfilters.generatorSortValue((UTF8)key, crawlDatum, sort);
+ } catch (ScoringFilterException sfe) {
+ LOG.warning("Couldn't filter generatorSortValue for " + key + ": " + sfe);
+ }
// sort by decreasing score
- sortValue.set(crawlDatum.getScore());
+ sortValue.set(sort);
entry.datum = crawlDatum;
entry.url = (UTF8)key;
output.collect(sortValue, entry); // invert for sort by score