You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2009/02/11 10:12:15 UTC

svn commit: r743277 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/CrawlDbMerger.java

Author: dogacan
Date: Wed Feb 11 09:12:15 2009
New Revision: 743277

URL: http://svn.apache.org/viewvc?rev=743277&view=rev
Log:
NUTCH-683 - NUTCH-676 broke CrawlDbMerger

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=743277&r1=743276&r2=743277&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Feb 11 09:12:15 2009
@@ -338,6 +338,7 @@
 126. NUTCH-636 - Httpclient plugin https doesn't work on IBM JRE
      (Curtis d'Entremont, ab)
 
+127. NUTCH-683 - NUTCH-676 broke CrawlDbMerger. (dogacan)
      
 Release 0.9 - 2007-04-02
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=743277&r1=743276&r2=743277&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Wed Feb 11 09:12:15 2009
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.*;
+import java.util.Map.Entry;
 
 // Commons Logging imports
 import org.apache.commons.logging.Log;
@@ -28,6 +29,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.util.*;
 import org.apache.hadoop.conf.*;
@@ -53,7 +55,7 @@
   private static final Log LOG = LogFactory.getLog(CrawlDbMerger.class);
 
   public static class Merger extends MapReduceBase implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
-    private org.apache.hadoop.io.MapWritable meta = new org.apache.hadoop.io.MapWritable();
+    private org.apache.hadoop.io.MapWritable meta;
     private CrawlDatum res = new CrawlDatum();
     private FetchSchedule schedule;
 
@@ -67,26 +69,32 @@
             throws IOException {
       long resTime = 0L;
       boolean resSet = false;
-      meta.clear();
+      meta = new org.apache.hadoop.io.MapWritable();
       while (values.hasNext()) {
         CrawlDatum val = values.next();
         if (!resSet) {
           res.set(val);
           resSet = true;
           resTime = schedule.calculateLastFetchTime(res);
-          meta.putAll(res.getMetaData());
+          for (Entry<Writable, Writable> e : res.getMetaData().entrySet()) {
+            meta.put(e.getKey(), e.getValue());
+          }
           continue;
         }
         // compute last fetch time, and pick the latest
         long valTime = schedule.calculateLastFetchTime(val);
         if (valTime > resTime) {
           // collect all metadata, newer values override older values
-          meta.putAll(val.getMetaData());
+          for (Entry<Writable, Writable> e : val.getMetaData().entrySet()) {
+            meta.put(e.getKey(), e.getValue());
+          }
           res.set(val);
           resTime = valTime ;
         } else {
           // insert older metadata before newer
-          val.getMetaData().putAll(meta);
+          for (Entry<Writable, Writable> e : meta.entrySet()) {
+            val.getMetaData().put(e.getKey(), e.getValue());
+          }
           meta = val.getMetaData();
         }
       }