You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/04/06 12:49:44 UTC

svn commit: r391958 - in /lucene/nutch/trunk: conf/nutch-default.xml src/java/org/apache/nutch/parse/ParseData.java src/test/org/apache/nutch/parse/TestParseData.java src/test/org/apache/nutch/util/WritableTestUtils.java

Author: jerome
Date: Thu Apr  6 03:49:40 2006
New Revision: 391958

URL: http://svn.apache.org/viewcvs?rev=391958&view=rev
Log:
NUTCH-244, db.max.outlinks.per.page can now be negative for no limit of handled outlinks per page

Modified:
    lucene/nutch/trunk/conf/nutch-default.xml
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
    lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Thu Apr  6 03:49:40 2006
@@ -255,6 +255,8 @@
   <name>db.max.outlinks.per.page</name>
   <value>100</value>
   <description>The maximum number of outlinks that we'll process for a page.
+  If this value is nonnegative (>=0), at most db.max.outlinks.per.page outlinks
+  will be processed for a page; otherwise, all outlinks will be processed.
   </description>
 </property>
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Thu Apr  6 03:49:40 2006
@@ -119,12 +119,15 @@
 
     int totalOutlinks = in.readInt();             // read outlinks
     int maxOutlinksPerPage = this.conf.getInt("db.max.outlinks.per.page", 100);
-    int outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+    int outlinksToRead = totalOutlinks;
+    if (maxOutlinksPerPage >= 0) {
+      outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+    }
     outlinks = new Outlink[outlinksToRead];
     for (int i = 0; i < outlinksToRead; i++) {
       outlinks[i] = Outlink.read(in);
     }
-    for (int i = maxOutlinksPerPage; i < totalOutlinks; i++) {
+    for (int i = outlinksToRead; i < totalOutlinks; i++) {
       Outlink.skip(in);
     }
     

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Thu Apr  6 03:49:40 2006
@@ -51,4 +51,31 @@
     WritableTestUtils.testWritable(r, conf);
   }
 	
+  public void testMaxOutlinks() throws Exception {
+    Outlink[] outlinks = new Outlink[128];
+    for (int i=0; i<outlinks.length; i++) {
+      outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i, conf);
+    }
+    ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS,
+                                       "Max Outlinks Title",
+                                       outlinks,
+                                       new Metadata());
+    Configuration conf = NutchConfiguration.create();
+    // No Outlinks
+    conf.setInt("db.max.outlinks.per.page", 0);
+    ParseData data = (ParseData) WritableTestUtils.writeRead(original, conf);
+    assertEquals(0, data.getOutlinks().length);
+    // Only 100 Outlinks
+    conf.setInt("db.max.outlinks.per.page", 100);
+    data = (ParseData) WritableTestUtils.writeRead(original, conf);
+    assertEquals(100, data.getOutlinks().length);
+    // 256 Outlinks
+    conf.setInt("db.max.outlinks.per.page", 256);
+    data = (ParseData) WritableTestUtils.writeRead(original, conf);
+    assertEquals(outlinks.length, data.getOutlinks().length);
+    // All Outlinks
+    conf.setInt("db.max.outlinks.per.page", -1);
+    data = (ParseData) WritableTestUtils.writeRead(original, conf);
+    assertEquals(outlinks.length, data.getOutlinks().length);
+  }
 }

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java Thu Apr  6 03:49:40 2006
@@ -31,6 +31,14 @@
   /** Utility method for testing writables. */
   public static void testWritable(Writable before, Configuration conf)
       throws Exception {
+    TestCase.assertEquals(before, writeRead(before, conf));
+  }
+
+  
+  /** Utility method for testing writables. */
+  public static Writable writeRead(Writable before, Configuration conf)
+    throws Exception {
+    
     DataOutputBuffer dob = new DataOutputBuffer();
     before.write(dob);
     
@@ -42,8 +50,7 @@
       ((Configurable)after).setConf(conf);
     }
     after.readFields(dib);
-
-    TestCase.assertEquals(before, after);
+    return after;
   }
-	
+  
 }