You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by si...@apache.org on 2009/02/24 11:09:36 UTC

svn commit: r747324 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/CrawlDatum.java src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Author: siren
Date: Tue Feb 24 10:09:36 2009
New Revision: 747324

URL: http://svn.apache.org/viewvc?rev=747324&view=rev
Log:
NUTCH-698 - CrawlDb is corrupted after a few crawl cycles, contributed by dogacan

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=747324&r1=747323&r2=747324&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Feb 24 10:09:36 2009
@@ -359,6 +359,9 @@
 
 134. NUTCH-247 - Robot parser to restrict (kubes, siren)
 
+135. NUTCH-698 - CrawlDb is corrupted after a few crawl cycles (dogacan
+     via siren)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=747324&r1=747323&r2=747324&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Tue Feb 24 10:09:36 2009
@@ -204,7 +204,17 @@
   }
   
    public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
-     this.metaData = mapWritable;
+     this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
+   }
+   
+   /** Add all metadata from other CrawlDatum to this CrawlDatum.
+    * 
+    * @param other CrawlDatum
+    */
+   public void putAllMetaData(CrawlDatum other) {
+     for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
+       metaData.put(e.getKey(), e.getValue());
+     }
    }
 
   /**

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=747324&r1=747323&r2=747324&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Tue Feb 24 10:09:36 2009
@@ -131,10 +131,10 @@
     if (oldSet) {
       // copy metadata from old, if exists
       if (old.getMetaData().size() > 0) {
-        result.getMetaData().putAll(old.getMetaData());
+        result.putAllMetaData(old);
         // overlay with new, if any
         if (fetch.getMetaData().size() > 0)
-          result.getMetaData().putAll(fetch.getMetaData());
+          result.putAllMetaData(fetch);
       }
       // set the most recent valid value of modifiedTime
       if (old.getModifiedTime() > 0 && fetch.getModifiedTime() == 0) {