You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/07/24 17:27:20 UTC

svn commit: r425092 - in /lucene/nutch/trunk: conf/nutch-default.xml src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java

Author: ab
Date: Mon Jul 24 08:27:20 2006
New Revision: 425092

URL: http://svn.apache.org/viewvc?rev=425092&view=rev
Log:
Apply NUTCH-324, and clarify documentation in nutch-default.xml .

Modified:
    lucene/nutch/trunk/conf/nutch-default.xml
    lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=425092&r1=425091&r2=425092&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Mon Jul 24 08:27:20 2006
@@ -266,7 +266,8 @@
   <name>db.score.link.external</name>
   <value>1.0</value>
   <description>The score factor for new pages added due to a link from
-  another host relative to the referencing page's score.
+  another host relative to the referencing page's score. Scoring plugins
+  may use this value to affect initial scores of external links.
   </description>
 </property>
 
@@ -274,7 +275,8 @@
   <name>db.score.link.internal</name>
   <value>1.0</value>
   <description>The score factor for pages added due to a link from the
-  same host, relative to the referencing page's score.
+  same host, relative to the referencing page's score. Scoring plugins
+  may use this value to affect initial scores of internal links.
   </description>
 </property>
 

Modified: lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java?rev=425092&r1=425091&r2=425092&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java Mon Jul 24 08:27:20 2006
@@ -16,6 +16,8 @@
 
 package org.apache.nutch.scoring.opic;
 
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.util.List;
 
 // Commons Logging imports
@@ -53,6 +55,8 @@
   private Configuration conf;
   private float scoreInjected;
   private float scorePower;
+  private float internalScoreFactor;
+  private float externalScoreFactor;
   private boolean countFiltered;
 
   public Configuration getConf() {
@@ -63,6 +67,8 @@
     this.conf = conf;
     scoreInjected = conf.getFloat("db.score.injected", 1.0f);
     scorePower = conf.getFloat("indexer.score.power", 0.5f);
+    internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f);
+    externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f);
     countFiltered = conf.getBoolean("db.score.count.filtered", false);
   }
 
@@ -112,6 +118,19 @@
       score /= allCount;
     } else {
       score /= validCount;
+    }
+    // internal or external score factor 
+    try {
+      String toHost = new URL(toUrl.toString()).getHost();
+      String fromHost = new URL(fromUrl.toString()).getHost();
+      if(toHost.equalsIgnoreCase(fromHost)){
+        score *= internalScoreFactor;
+      } else {
+        score *= externalScoreFactor;
+      }
+    } catch (MalformedURLException e) {
+       e.printStackTrace(LogUtil.getWarnStream(LOG));
+       score *= externalScoreFactor;
     }
     target.setScore(score);
     // XXX (ab) no adjustment? I think this is contrary to the algorithm descr.