You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2007/03/22 11:08:01 UTC

svn commit: r521182 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/Injector.java

Author: ab
Date: Thu Mar 22 03:08:00 2007
New Revision: 521182

URL: http://svn.apache.org/viewvc?view=rev&rev=521182
Log:
NUTCH-246 - incorrect segment size being generated due to time
synchronization issue.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=521182&r1=521181&r2=521182
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Thu Mar 22 03:08:00 2007
@@ -166,6 +166,9 @@
 
 56. Upgrade to Hadoop 0.12.1 release. (ab)
 
+57. NUTCH-246 - Incorrect segment size being generated due to time
+    synchronization issue (Stefan Groschupf via ab)
+
 	
 Release 0.8 - 2006-07-25
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?view=diff&rev=521182&r1=521181&r2=521182
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Mar 22 03:08:00 2007
@@ -51,6 +51,7 @@
     private JobConf jobConf;
     private URLFilters filters;
     private ScoringFilters scfilters;
+    private long curTime;
 
     public void configure(JobConf job) {
       this.jobConf = job;
@@ -59,6 +60,7 @@
       filters = new URLFilters(jobConf);
       scfilters = new ScoringFilters(jobConf);
       scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
+      curTime = job.getLong("injector.current.time", System.currentTimeMillis());
     }
 
     public void close() {}
@@ -79,6 +81,7 @@
       if (url != null) {                          // if it passes
         value.set(url);                           // collect it
         CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED, interval);
+        datum.setFetchTime(curTime);
         datum.setScore(scoreInjected);
         try {
           scfilters.injectedScore(value, datum);
@@ -96,7 +99,7 @@
 
   /** Combine multiple new entries for a url. */
   public static class InjectReducer implements Reducer {
-    public void configure(JobConf job) {}
+    public void configure(JobConf job) {}    
     public void close() {}
 
     public void reduce(WritableComparable key, Iterator values,
@@ -155,6 +158,7 @@
     sortJob.setOutputFormat(SequenceFileOutputFormat.class);
     sortJob.setOutputKeyClass(Text.class);
     sortJob.setOutputValueClass(CrawlDatum.class);
+    sortJob.setLong("injector.current.time", System.currentTimeMillis());
     JobClient.runJob(sortJob);
 
     // merge with existing crawl db