You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/11/04 13:33:59 UTC
svn commit: r832725 -
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
Author: srowen
Date: Wed Nov 4 12:33:59 2009
New Revision: 832725
URL: http://svn.apache.org/viewvc?rev=832725&view=rev
Log:
Fix rank assignment in Spearman correlation -- was giving bad similarity values
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=832725&r1=832724&r2=832725&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Wed Nov 4 12:33:59 2009
@@ -54,17 +54,30 @@
return Double.NaN;
}
+ // Copy prefs since we need to modify pref values to ranks
xPrefs = xPrefs.clone();
yPrefs = yPrefs.clone();
-
+
+ // First sort by values from low to high
xPrefs.sortByValue();
yPrefs.sortByValue();
+ // Assign ranks from low to high
+ float nextRank = 1.0f;
for (int i = 0; i < xLength; i++) {
- xPrefs.setValue(i, i);
+ // ... but only for items that are common to both pref arrays
+ if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) {
+ xPrefs.setValue(i, nextRank);
+ nextRank += 1.0f;
+ }
+ // Other values are bogus but don't matter
}
+ nextRank = 1.0f;
for (int i = 0; i < yLength; i++) {
- yPrefs.setValue(i, i);
+ if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) {
+ yPrefs.setValue(i, nextRank);
+ nextRank += 1.0f;
+ }
}
xPrefs.sortByItem();
@@ -99,7 +112,12 @@
}
}
- return 1.0 - (6.0 * sumXYRankDiff2 / count / (count*count - 1));
+ if (count <= 1) {
+ return Double.NaN;
+ }
+
+ // When ranks are unique, this formula actually gives the Pearson correlation
+ return 1.0 - (6.0 * sumXYRankDiff2 / (count * (count*count - 1)));
}
@Override