You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/04 13:33:59 UTC

svn commit: r832725 - /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java

Author: srowen
Date: Wed Nov  4 12:33:59 2009
New Revision: 832725

URL: http://svn.apache.org/viewvc?rev=832725&view=rev
Log:
Fix rank assignment in Spearman correlation -- was giving bad similarity values

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=832725&r1=832724&r2=832725&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Wed Nov  4 12:33:59 2009
@@ -54,17 +54,30 @@
       return Double.NaN;
     }
 
+    // Copy prefs since we need to modify pref values to ranks
     xPrefs = xPrefs.clone();
     yPrefs = yPrefs.clone();
-    
+
+    // First sort by values from low to high
     xPrefs.sortByValue();
     yPrefs.sortByValue();
 
+    // Assign ranks from low to high
+    float nextRank = 1.0f;
     for (int i = 0; i < xLength; i++) {
-      xPrefs.setValue(i, i);
+      // ... but only for items that are common to both pref arrays
+      if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) {
+        xPrefs.setValue(i, nextRank);
+        nextRank += 1.0f;
+      }
+      // Other values are bogus but don't matter
     }
+    nextRank = 1.0f;
     for (int i = 0; i < yLength; i++) {
-      yPrefs.setValue(i, i);
+      if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) {
+        yPrefs.setValue(i, nextRank);
+        nextRank += 1.0f;
+      }
     }
 
     xPrefs.sortByItem();
@@ -99,7 +112,12 @@
       }
     }
 
-    return 1.0 - (6.0 * sumXYRankDiff2 / count / (count*count - 1));
+    if (count <= 1) {
+      return Double.NaN;
+    }
+
+    // When ranks are unique, this formula actually gives the Pearson correlation
+    return 1.0 - (6.0 * sumXYRankDiff2 / (count * (count*count - 1)));
   }
 
   @Override