You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2013/06/13 14:10:38 UTC

svn commit: r1492639 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java src/java/org/apache/nutch/tools/FreeGenerator.java

Author: markus
Date: Thu Jun 13 12:10:37 2013
New Revision: 1492639

URL: http://svn.apache.org/r1492639
Log:
NUTCH-1430 Freegenerator records overwrite CrawlDB records with AdaptiveFetchSchedule 

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
    nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1492639&r1=1492638&r2=1492639&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jun 13 12:10:37 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk): Current Development
 
+* NUTCH-1430 Freegenerator records overwrite CrawlDB records with AdaptiveFetchSchedule (markus)
+
 * NUTCH-1522 Upgrade to Tika 1.3 (jnioche)
 
 * NUTCH-1578 Upgrade to Hadoop 1.2.0 (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java?rev=1492639&r1=1492638&r2=1492639&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java Thu Jun 13 12:10:37 2013
@@ -94,6 +94,9 @@ public class AdaptiveFetchSchedule exten
     float interval = datum.getFetchInterval();
     long refTime = fetchTime;
 
+    // https://issues.apache.org/jira/browse/NUTCH-1430
+    interval = (interval == 0) ? defaultInterval : interval;
+
     if (datum.getMetaData().containsKey(Nutch.WRITABLE_FIXED_INTERVAL_KEY)) {
       // Is fetch interval preset in CrawlDatum MD? Then use preset interval
       FloatWritable customIntervalWritable= (FloatWritable)(datum.getMetaData().get(Nutch.WRITABLE_FIXED_INTERVAL_KEY));

Modified: nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java?rev=1492639&r1=1492638&r2=1492639&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java Thu Jun 13 12:10:37 2013
@@ -72,10 +72,12 @@ public class FreeGenerator extends Confi
     private ScoringFilters scfilters;
     private CrawlDatum datum = new CrawlDatum();
     private Text url = new Text();
+    private int defaultInterval = 0;
 
     @Override
     public void configure(JobConf job) {
       super.configure(job);
+      defaultInterval = job.getInt("db.fetch.interval.default", 0);
       scfilters = new ScoringFilters(job);
       if (job.getBoolean(FILTER_KEY, false)) {
         filters = new URLFilters(job);
@@ -84,7 +86,7 @@ public class FreeGenerator extends Confi
         normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
       }
     }
-    
+
     Generator.SelectorEntry entry = new Generator.SelectorEntry();
 
     public void map(WritableComparable<?> key, Text value, OutputCollector<Text,
@@ -114,6 +116,8 @@ public class FreeGenerator extends Confi
       }
       entry.datum = datum;
       entry.url = url;
+      // https://issues.apache.org/jira/browse/NUTCH-1430
+      entry.datum.setFetchInterval(defaultInterval);
       output.collect(url, entry);
     }
 
@@ -131,7 +135,7 @@ public class FreeGenerator extends Confi
       }
     }
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
@@ -156,7 +160,7 @@ public class FreeGenerator extends Confi
         }
       }
     }
-    
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("FreeGenerator: starting at " + sdf.format(start));