You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/03/22 11:08:01 UTC
svn commit: r521182 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/crawl/Injector.java
Author: ab
Date: Thu Mar 22 03:08:00 2007
New Revision: 521182
URL: http://svn.apache.org/viewvc?view=rev&rev=521182
Log:
NUTCH-246 - incorrect segment size being generated due to time
synchronization issue.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=521182&r1=521181&r2=521182
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Thu Mar 22 03:08:00 2007
@@ -166,6 +166,9 @@
56. Upgrade to Hadoop 0.12.1 release. (ab)
+57. NUTCH-246 - Incorrect segment size being generated due to time
+ synchronization issue (Stefan Groschupf via ab)
+
Release 0.8 - 2006-07-25
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?view=diff&rev=521182&r1=521181&r2=521182
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Mar 22 03:08:00 2007
@@ -51,6 +51,7 @@
private JobConf jobConf;
private URLFilters filters;
private ScoringFilters scfilters;
+ private long curTime;
public void configure(JobConf job) {
this.jobConf = job;
@@ -59,6 +60,7 @@
filters = new URLFilters(jobConf);
scfilters = new ScoringFilters(jobConf);
scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
+ curTime = job.getLong("injector.current.time", System.currentTimeMillis());
}
public void close() {}
@@ -79,6 +81,7 @@
if (url != null) { // if it passes
value.set(url); // collect it
CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED, interval);
+ datum.setFetchTime(curTime);
datum.setScore(scoreInjected);
try {
scfilters.injectedScore(value, datum);
@@ -96,7 +99,7 @@
/** Combine multiple new entries for a url. */
public static class InjectReducer implements Reducer {
- public void configure(JobConf job) {}
+ public void configure(JobConf job) {}
public void close() {}
public void reduce(WritableComparable key, Iterator values,
@@ -155,6 +158,7 @@
sortJob.setOutputFormat(SequenceFileOutputFormat.class);
sortJob.setOutputKeyClass(Text.class);
sortJob.setOutputValueClass(CrawlDatum.class);
+ sortJob.setLong("injector.current.time", System.currentTimeMillis());
JobClient.runJob(sortJob);
// merge with existing crawl db