You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/07/24 17:27:20 UTC
svn commit: r425092 - in /lucene/nutch/trunk: conf/nutch-default.xml
src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
Author: ab
Date: Mon Jul 24 08:27:20 2006
New Revision: 425092
URL: http://svn.apache.org/viewvc?rev=425092&view=rev
Log:
Apply NUTCH-324, and clarify documentation in nutch-default.xml .
Modified:
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=425092&r1=425091&r2=425092&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Mon Jul 24 08:27:20 2006
@@ -266,7 +266,8 @@
<name>db.score.link.external</name>
<value>1.0</value>
<description>The score factor for new pages added due to a link from
- another host relative to the referencing page's score.
+ another host relative to the referencing page's score. Scoring plugins
+ may use this value to affect initial scores of external links.
</description>
</property>
@@ -274,7 +275,8 @@
<name>db.score.link.internal</name>
<value>1.0</value>
<description>The score factor for pages added due to a link from the
- same host, relative to the referencing page's score.
+ same host, relative to the referencing page's score. Scoring plugins
+ may use this value to affect initial scores of internal links.
</description>
</property>
Modified: lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java?rev=425092&r1=425091&r2=425092&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java Mon Jul 24 08:27:20 2006
@@ -16,6 +16,8 @@
package org.apache.nutch.scoring.opic;
+import java.net.MalformedURLException;
+import java.net.URL;
import java.util.List;
// Commons Logging imports
@@ -53,6 +55,8 @@
private Configuration conf;
private float scoreInjected;
private float scorePower;
+ private float internalScoreFactor;
+ private float externalScoreFactor;
private boolean countFiltered;
public Configuration getConf() {
@@ -63,6 +67,8 @@
this.conf = conf;
scoreInjected = conf.getFloat("db.score.injected", 1.0f);
scorePower = conf.getFloat("indexer.score.power", 0.5f);
+ internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f);
+ externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f);
countFiltered = conf.getBoolean("db.score.count.filtered", false);
}
@@ -112,6 +118,19 @@
score /= allCount;
} else {
score /= validCount;
+ }
+ // internal or external score factor
+ try {
+ String toHost = new URL(toUrl.toString()).getHost();
+ String fromHost = new URL(fromUrl.toString()).getHost();
+ if(toHost.equalsIgnoreCase(fromHost)){
+ score *= internalScoreFactor;
+ } else {
+ score *= externalScoreFactor;
+ }
+ } catch (MalformedURLException e) {
+ e.printStackTrace(LogUtil.getWarnStream(LOG));
+ score *= externalScoreFactor;
}
target.setScore(score);
// XXX (ab) no adjustment? I think this is contrary to the algorithm descr.