You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/06/09 23:19:56 UTC
svn commit: r189821 -
/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
Author: cutting
Date: Thu Jun 9 14:19:55 2005
New Revision: 189821
URL: http://svn.apache.org/viewcvs?rev=189821&view=rev
Log:
Compute inlink count after domain-deduping anchors, as in trunk.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java?rev=189821&r1=189820&r2=189821&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Indexer.java Thu Jun 9 14:19:55 2005
@@ -149,6 +149,7 @@
Document doc = new Document();
Properties meta = parseData.getMetadata();
+ String[] anchors = inlinks!=null ? inlinks.getAnchors() : new String[0];
// add segment, used to map from merged index back to segment files
doc.add(Field.UnIndexed("segment",
@@ -160,7 +161,7 @@
// compute boost
float boost =
IndexSegment.calculateBoost(1.0f, scorePower, boostByLinkCount,
- inlinks == null ? 0 : inlinks.size());
+ anchors.length);
// apply boost to all indexed fields.
doc.setBoost(boost);
// store boost for use by explain and dedup
@@ -176,7 +177,6 @@
try {
// dummy up a FetcherOutput so that we can use existing indexing filters
// TODO: modify IndexingFilter interface to use Inlinks, etc.
- String[] anchors = inlinks!=null ? inlinks.getAnchors() : new String[0];
FetcherOutput fo =
new FetcherOutput(new FetchListEntry(true,new Page((UTF8)key),anchors),
null, null);