You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/08/24 06:14:20 UTC

svn commit: r239523 - /lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java

Author: cutting
Date: Tue Aug 23 21:14:18 2005
New Revision: 239523

URL: http://svn.apache.org/viewcvs?rev=239523&view=rev
Log:
Limit number of inlinks to avoid out-of-memory exceptions.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=239523&r1=239522&r2=239523&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java Tue Aug 23 21:14:18 2005
@@ -36,6 +36,7 @@
   public static String CURRENT_NAME = "current";
 
   private int maxAnchorLength;
+  private int maxInlinks;
 
   public LinkDb() {
     super(null);
@@ -48,6 +49,7 @@
 
   public void configure(JobConf job) {
     maxAnchorLength = job.getInt("db.max.anchor.length", 100);
+    maxInlinks = job.getInt("db.max.inlinks", 100000);
   }
 
   public void map(WritableComparable key, Writable value,
@@ -72,13 +74,12 @@
   public void reduce(WritableComparable key, Iterator values,
                      OutputCollector output, Reporter reporter)
     throws IOException {
-    Inlinks result = null;
+    Inlinks result = new Inlinks();
     while (values.hasNext()) {
       Inlinks inlinks = (Inlinks)values.next();
-      if (result == null) {
-        result = inlinks;
-      } else {
-        result.add(inlinks);
+      int end = Math.min(maxInlinks - result.size(), inlinks.size());
+      for (int i = 0; i < end; i++) {
+        result.add(inlinks.get(i));
       }
     }
     output.collect(key, result);