You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/04/06 12:49:44 UTC
svn commit: r391958 - in /lucene/nutch/trunk: conf/nutch-default.xml
src/java/org/apache/nutch/parse/ParseData.java
src/test/org/apache/nutch/parse/TestParseData.java
src/test/org/apache/nutch/util/WritableTestUtils.java
Author: jerome
Date: Thu Apr 6 03:49:40 2006
New Revision: 391958
URL: http://svn.apache.org/viewcvs?rev=391958&view=rev
Log:
NUTCH-244, db.max.outlinks.per.page can now be negative for no limit of handled outlinks per page
Modified:
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Thu Apr 6 03:49:40 2006
@@ -255,6 +255,8 @@
<name>db.max.outlinks.per.page</name>
<value>100</value>
<description>The maximum number of outlinks that we'll process for a page.
+ If this value is nonnegative (>=0), at most db.max.outlinks.per.page outlinks
+ will be processed for a page; otherwise, all outlinks will be processed.
</description>
</property>
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Thu Apr 6 03:49:40 2006
@@ -119,12 +119,15 @@
int totalOutlinks = in.readInt(); // read outlinks
int maxOutlinksPerPage = this.conf.getInt("db.max.outlinks.per.page", 100);
- int outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+ int outlinksToRead = totalOutlinks;
+ if (maxOutlinksPerPage >= 0) {
+ outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks);
+ }
outlinks = new Outlink[outlinksToRead];
for (int i = 0; i < outlinksToRead; i++) {
outlinks[i] = Outlink.read(in);
}
- for (int i = maxOutlinksPerPage; i < totalOutlinks; i++) {
+ for (int i = outlinksToRead; i < totalOutlinks; i++) {
Outlink.skip(in);
}
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Thu Apr 6 03:49:40 2006
@@ -51,4 +51,31 @@
WritableTestUtils.testWritable(r, conf);
}
+ public void testMaxOutlinks() throws Exception {
+ Outlink[] outlinks = new Outlink[128];
+ for (int i=0; i<outlinks.length; i++) {
+ outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i, conf);
+ }
+ ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS,
+ "Max Outlinks Title",
+ outlinks,
+ new Metadata());
+ Configuration conf = NutchConfiguration.create();
+ // No Outlinks
+ conf.setInt("db.max.outlinks.per.page", 0);
+ ParseData data = (ParseData) WritableTestUtils.writeRead(original, conf);
+ assertEquals(0, data.getOutlinks().length);
+ // Only 100 Outlinks
+ conf.setInt("db.max.outlinks.per.page", 100);
+ data = (ParseData) WritableTestUtils.writeRead(original, conf);
+ assertEquals(100, data.getOutlinks().length);
+ // 256 Outlinks
+ conf.setInt("db.max.outlinks.per.page", 256);
+ data = (ParseData) WritableTestUtils.writeRead(original, conf);
+ assertEquals(outlinks.length, data.getOutlinks().length);
+ // All Outlinks
+ conf.setInt("db.max.outlinks.per.page", -1);
+ data = (ParseData) WritableTestUtils.writeRead(original, conf);
+ assertEquals(outlinks.length, data.getOutlinks().length);
+ }
}
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java?rev=391958&r1=391957&r2=391958&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java Thu Apr 6 03:49:40 2006
@@ -31,6 +31,14 @@
/** Utility method for testing writables. */
public static void testWritable(Writable before, Configuration conf)
throws Exception {
+ TestCase.assertEquals(before, writeRead(before, conf));
+ }
+
+
+ /** Utility method for testing writables. */
+ public static Writable writeRead(Writable before, Configuration conf)
+ throws Exception {
+
DataOutputBuffer dob = new DataOutputBuffer();
before.write(dob);
@@ -42,8 +50,7 @@
((Configurable)after).setConf(conf);
}
after.readFields(dib);
-
- TestCase.assertEquals(before, after);
+ return after;
}
-
+
}