You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/04/26 12:54:58 UTC

svn commit: r397169 - in /lucene/nutch/trunk/src/java/org/apache/nutch/crawl: CrawlDatum.java CrawlDbReducer.java

Author: ab
Date: Wed Apr 26 03:54:53 2006
New Revision: 397169

URL: http://svn.apache.org/viewcvs?rev=397169&view=rev
Log:
Don't allow CrawlDatum.getMetaData() to return null. Underlying
MapWritable is lazily instantiated to minimize the number of
created objects.

Refactor CrawlDbReducer to use this assumption.

Add missing statements in CrawlDatum.equals() and CrawlDatum.hashCode()
that deal with metaData.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=397169&r1=397168&r2=397169&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Wed Apr 26 03:54:53 2006
@@ -121,11 +121,11 @@
    public void setMetaData(MapWritable mapWritable) {this.metaData = mapWritable; }
 
   /**
-   * returns a MapWritable if it was set or read @see readFields(DataInput), 
-   * returns null in case CrawlDatum was freshly generated or an empty map 
-   * in case CrawlDatum is a recycled instance.
+   * returns a MapWritable if it was set or read in @see readFields(DataInput), 
+   * returns empty map in case CrawlDatum was freshly created (lazily instantiated).
    */
   public MapWritable getMetaData() {
+    if (this.metaData == null) this.metaData = new MapWritable();
     return this.metaData;
   }
   
@@ -291,6 +291,7 @@
     buf.append("Retry interval: " + getFetchInterval() + " days\n");
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
+    buf.append("Metadata: " + (metaData != null ? metaData.toString() : "null") + "\n");
     return buf.toString();
   }
 
@@ -298,7 +299,7 @@
     if (!(o instanceof CrawlDatum))
       return false;
     CrawlDatum other = (CrawlDatum)o;
-    return
+    boolean res =
       (this.status == other.status) &&
       (this.fetchTime == other.fetchTime) &&
       (this.modifiedTime == other.modifiedTime) &&
@@ -306,6 +307,19 @@
       (this.fetchInterval == other.fetchInterval) &&
       (SignatureComparator._compare(this.signature, other.signature) == 0) &&
       (this.score == other.score);
+    if (!res) return res;
+    // allow zero-sized metadata to be equal to null metadata
+    if (this.metaData == null) {
+      if (other.metaData != null && other.metaData.size() > 0) return false;
+      else return true;
+    } else {
+      if (other.metaData == null) {
+        if (this.metaData.size() == 0) return true;
+        else return false;
+      } else {
+        return this.metaData.equals(other.metaData);
+      }
+    }
   }
 
   public int hashCode() {
@@ -316,6 +330,7 @@
                 signature[i+2] << 8 + signature[i+3]);
       }
     }
+    if (metaData != null) res ^= metaData.hashCode();
     return
       res ^ status ^
       ((int)fetchTime) ^

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=397169&r1=397168&r2=397169&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Apr 26 03:54:53 2006
@@ -67,11 +67,10 @@
     result.set(highest);
     if (old != null) {
       // copy metadata from old, if exists
-      if (old.getMetaData() != null) {
-        if (result.getMetaData() == null) result.setMetaData(new MapWritable());
+      if (old.getMetaData().size() > 0) {
         result.getMetaData().putAll(old.getMetaData());
         // overlay with new, if any
-        if (highest.getMetaData() != null)
+        if (highest.getMetaData().size() > 0)
           result.getMetaData().putAll(highest.getMetaData());
       }
       // set the most recent valid value of modifiedTime