You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/08/17 16:56:35 UTC
svn commit: r432256 -
/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Author: ab
Date: Thu Aug 17 07:56:35 2006
New Revision: 432256
URL: http://svn.apache.org/viewvc?rev=432256&view=rev
Log:
Apply patch in NUTCH-348 - Generator used the lowest score instead of
the highest. Contributed by Chris Schneider and Stefan Groschupf.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=432256&r1=432255&r2=432256&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Thu Aug 17 07:56:35 2006
@@ -59,7 +59,11 @@
public void write(DataOutput out) throws IOException {
url.write(out);
datum.write(out);
- }
+ }
+
+ public String toString() {
+ return "url=" + url.toString() + ", datum=" + datum.toString();
+ }
}
/** Selects entries due for fetch. */
@@ -118,7 +122,7 @@
LOG.warn("Couldn't filter generatorSortValue for " + key + ": " + sfe);
}
}
- // sort by decreasing score
+ // sort by decreasing score, using DecreasingFloatComparator
sortValue.set(sort);
entry.datum = crawlDatum;
entry.url = (UTF8)key;
@@ -196,6 +200,20 @@
}
+ public static class DecreasingFloatComparator extends WritableComparator {
+
+ public DecreasingFloatComparator() {
+ super(FloatWritable.class);
+ }
+
+ /** Compares two FloatWritables decreasing. */
+ public int compare(WritableComparable o1, WritableComparable o2) {
+ float thisValue = ((FloatWritable) o1).get();
+ float thatValue = ((FloatWritable) o2).get();
+ return (thisValue<thatValue ? 1 : (thisValue == thatValue ? 0 : -1));
+ }
+ }
+
public static class SelectorInverseMapper extends MapReduceBase implements Mapper {
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException {
@@ -270,7 +288,7 @@
if (LOG.isInfoEnabled()) {
LOG.info("Generator: starting");
LOG.info("Generator: segment: " + segment);
- LOG.info("Generator: Selecting most-linked urls due for fetch.");
+ LOG.info("Generator: Selecting best-scoring urls due for fetch.");
}
// map to inverted subset due for fetch, sort by link count
@@ -296,6 +314,7 @@
job.setOutputPath(tempDir);
job.setOutputFormat(SequenceFileOutputFormat.class);
job.setOutputKeyClass(FloatWritable.class);
+ job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
job.setOutputValueClass(SelectorEntry.class);
JobClient.runJob(job);