You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/28 22:15:10 UTC
svn commit: r939074 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity:
AbstractSimilarity.java EuclideanDistanceSimilarity.java
PearsonCorrelationSimilarity.java UncenteredCosineSimilarity.java
Author: srowen
Date: Wed Apr 28 20:15:09 2010
New Revision: 939074
URL: http://svn.apache.org/viewvc?rev=939074&view=rev
Log:
MAHOUT-387
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java
- copied, changed from r938784, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=939074&r1=939073&r2=939074&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Wed Apr 28 20:15:09 2010
@@ -40,30 +40,23 @@ abstract class AbstractSimilarity implem
private PreferenceTransform prefTransform;
private SimilarityTransform similarityTransform;
private final boolean weighted;
+ private final boolean centerData;
private int cachedNumItems;
private int cachedNumUsers;
private final RefreshHelper refreshHelper;
/**
* <p>
- * Creates a normal (unweighted) .
+ * Creates a possibly weighted AbstractSimilarity.
* </p>
*/
- AbstractSimilarity(DataModel dataModel) throws TasteException {
- this(dataModel, Weighting.UNWEIGHTED);
- }
-
- /**
- * <p>
- * Creates a possibly weighted .
- * </p>
- */
- AbstractSimilarity(final DataModel dataModel, Weighting weighting) throws TasteException {
+ AbstractSimilarity(final DataModel dataModel, Weighting weighting, boolean centerData) throws TasteException {
if (dataModel == null) {
throw new IllegalArgumentException("dataModel is null");
}
this.dataModel = dataModel;
this.weighted = weighting == Weighting.WEIGHTED;
+ this.centerData = centerData;
this.cachedNumItems = dataModel.getNumItems();
this.cachedNumUsers = dataModel.getNumUsers();
this.refreshHelper = new RefreshHelper(new Callable<Object>() {
@@ -304,19 +297,23 @@ abstract class AbstractSimilarity implem
yIndex = yPrefs.getUserID(yPrefIndex);
}
}
-
- // See comments above on these computations
- double n = count;
- double meanX = sumX / n;
- double meanY = sumY / n;
- // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
- double centeredSumXY = sumXY - meanY * sumX;
- // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
- double centeredSumX2 = sumX2 - meanX * sumX;
- // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
- double centeredSumY2 = sumY2 - meanY * sumY;
-
- double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
+
+ double result;
+ if (centerData) {
+ // See comments above on these computations
+ double n = (double) count;
+ double meanX = sumX / n;
+ double meanY = sumY / n;
+ // double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
+ double centeredSumXY = sumXY - meanY * sumX;
+ // double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
+ double centeredSumX2 = sumX2 - meanX * sumX;
+ // double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
+ double centeredSumY2 = sumY2 - meanY * sumY;
+ result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
+ } else {
+ result = computeResult(count, sumXY, sumX2, sumY2, sumXYdiff2);
+ }
if (similarityTransform != null) {
result = similarityTransform.transformSimilarity(itemID1, itemID2, result);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=939074&r1=939073&r2=939074&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Wed Apr 28 20:15:09 2010
@@ -36,17 +36,14 @@ public final class EuclideanDistanceSimi
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
public EuclideanDistanceSimilarity(DataModel dataModel) throws TasteException {
- super(dataModel);
- if (!dataModel.hasPreferenceValues()) {
- throw new IllegalArgumentException("DataModel doesn't have preference values");
- }
+ this(dataModel, Weighting.WEIGHTED);
}
/**
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
public EuclideanDistanceSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
- super(dataModel, weighting);
+ super(dataModel, weighting, false);
if (!dataModel.hasPreferenceValues()) {
throw new IllegalArgumentException("DataModel doesn't have preference values");
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=939074&r1=939073&r2=939074&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Wed Apr 28 20:15:09 2010
@@ -46,10 +46,14 @@ import org.apache.mahout.cf.taste.model.
* </p>
*
* <p>
- * This correlation implementation is equivalent to the cosine measure correlation since the data it receives
+ * This correlation implementation is equivalent to the cosine similarity since the data it receives
* is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle
* between the two vectors defined by the users' preference values.
* </p>
+ *
+ * <p>
+ * For cosine similarity on uncentered data, see {@link UncenteredCosineSimilarity}.
+ * </p>
*/
public final class PearsonCorrelationSimilarity extends AbstractSimilarity {
@@ -57,17 +61,14 @@ public final class PearsonCorrelationSim
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
public PearsonCorrelationSimilarity(DataModel dataModel) throws TasteException {
- super(dataModel);
- if (!dataModel.hasPreferenceValues()) {
- throw new IllegalArgumentException("DataModel doesn't have preference values");
- }
+ this(dataModel, Weighting.WEIGHTED);
}
/**
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
public PearsonCorrelationSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
- super(dataModel, weighting);
+ super(dataModel, weighting, true);
if (!dataModel.hasPreferenceValues()) {
throw new IllegalArgumentException("DataModel doesn't have preference values");
}
Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java (from r938784, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java&r1=938784&r2=939074&rev=939074&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java Wed Apr 28 20:15:09 2010
@@ -23,63 +23,40 @@ import org.apache.mahout.cf.taste.model.
/**
* <p>
- * An implementation of the Pearson correlation. For users X and Y, the following values are calculated:
+ * An implementation of the cosine similarity. The result is the cosine of the angle formed between
+ * the two preference vectors.
* </p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and Y express a
- * preference</li>
- * </ul>
- *
+ *
* <p>
- * The correlation is then:
- *
- * <p>
- * <code>sumXY / sqrt(sumX2 * sumY2)</code>
- * </p>
- *
- * <p>
- * Note that this correlation "centers" its data, shifts the user's preference values so that each of their
- * means is 0. This is necessary to achieve expected behavior on all data sets.
- * </p>
- *
- * <p>
- * This correlation implementation is equivalent to the cosine measure correlation since the data it receives
- * is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle
- * between the two vectors defined by the users' preference values.
+ * Note that this similarity does not "center" its data, shifts the user's preference values so that each of their
+ * means is 0. For this behavior, use {@link PearsonCorrelationSimilarity}, which actually is mathematically
+ * equivalent for centered data.
* </p>
*/
-public final class PearsonCorrelationSimilarity extends AbstractSimilarity {
+public final class UncenteredCosineSimilarity extends AbstractSimilarity {
/**
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
- public PearsonCorrelationSimilarity(DataModel dataModel) throws TasteException {
- super(dataModel);
- if (!dataModel.hasPreferenceValues()) {
- throw new IllegalArgumentException("DataModel doesn't have preference values");
- }
+ public UncenteredCosineSimilarity(DataModel dataModel) throws TasteException {
+ this(dataModel, Weighting.WEIGHTED);
}
/**
* @throws IllegalArgumentException if {@link DataModel} does not have preference values
*/
- public PearsonCorrelationSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
- super(dataModel, weighting);
+ public UncenteredCosineSimilarity(DataModel dataModel, Weighting weighting) throws TasteException {
+ super(dataModel, weighting, false);
if (!dataModel.hasPreferenceValues()) {
throw new IllegalArgumentException("DataModel doesn't have preference values");
}
}
-
+
@Override
double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
if (n == 0) {
return Double.NaN;
}
- // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
- // the data is assumed to be centered.
double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2);
if (denominator == 0.0) {
// One or both parties has -all- the same ratings;
@@ -88,5 +65,5 @@ public final class PearsonCorrelationSim
}
return sumXY / denominator;
}
-
-}
+
+}
\ No newline at end of file