You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/31 00:06:09 UTC

svn commit: r661892 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/impl/correlation/ test/java/org/apache/mahout/cf/taste/impl/ test/java/org/apache/mahout/cf/taste/impl/correlation/

Author: srowen
Date: Fri May 30 15:06:08 2008
New Revision: 661892

URL: http://svn.apache.org/viewvc?rev=661892&view=rev
Log:
Added EuclideanDistanceCorrelation, TanimotoCoefficientCorrelation, plus tests, having been inspired by the book "Collective Intelligence"

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java
      - copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java
      - copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java
      - copied, changed from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java
      - copied, changed from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java
      - copied, changed from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AbstractCorrelation.java Fri May 30 15:06:08 2008
@@ -17,46 +17,27 @@
 
 package org.apache.mahout.cf.taste.impl.correlation;
 
-import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
 import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
 import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
 import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
+import org.apache.mahout.cf.taste.common.TasteException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * Abstract superclass encapsulating functionality that is common to most
+ * implementations in this package, including the basic correlation algorithm,
+ * normalization, transforms, etc.
  */
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+abstract class AbstractCorrelation implements UserCorrelation, ItemCorrelation {
 
-  private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
+  private static final Logger log = LoggerFactory.getLogger(AbstractCorrelation.class);
 
   private final DataModel dataModel;
   private PreferenceInferrer inferrer;
@@ -65,21 +46,21 @@
   private boolean weighted;
 
   /**
-   * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
+   * <p>Creates a normal (unweighted) {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation}.</p>
    *
    * @param dataModel
    */
-  public PearsonCorrelation(DataModel dataModel) {
+  public AbstractCorrelation(DataModel dataModel) {
     this(dataModel, false);
   }
 
   /**
-   * <p>Creates a weighted {@link PearsonCorrelation}.</p>
+   * <p>Creates a weighted {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation}.</p>
    *
    * @param dataModel
    * @param weighted
    */
-  public PearsonCorrelation(DataModel dataModel, boolean weighted) {
+  public AbstractCorrelation(DataModel dataModel, boolean weighted) {
     if (dataModel == null) {
       throw new IllegalArgumentException("dataModel is null");
     }
@@ -87,74 +68,60 @@
     this.weighted = weighted;
   }
 
-  /**
-   * <p>Several subclasses in this package implement this method to actually compute the correlation
-   * from figures computed over users or items. Note that the computations in this class "center" the
-   * data, such that X and Y's mean are 0.</p>
-   *
-   * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
-   * the standard correlation computations as a result.</p>
-   *
-   * @param n total number of users or items
-   * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
-   * both users/items
-   * @param sumX2 sum of the square of user/item preference values, over the first item/user
-   * @param sumY2 sum of the square of the user/item preference values, over the second item/user
-   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
-   *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
-   */
-  private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
-    if (n == 0) {
-      return Double.NaN;
-    }
-    // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
-    // the data is assumed to be centered.
-    double xTerm = Math.sqrt(sumX2);
-    double yTerm = Math.sqrt(sumY2);
-    double denominator = xTerm * yTerm;
-    if (denominator == 0.0) {
-      // One or both parties has -all- the same ratings;
-      // can't really say much correlation under this measure
-      return Double.NaN;
-    }
-    return sumXY / denominator;
-  }
-
-  DataModel getDataModel() {
+  final DataModel getDataModel() {
     return dataModel;
   }
 
-  PreferenceInferrer getPreferenceInferrer() {
+  final PreferenceInferrer getPreferenceInferrer() {
     return inferrer;
   }
 
-  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+  public final void setPreferenceInferrer(PreferenceInferrer inferrer) {
     if (inferrer == null) {
       throw new IllegalArgumentException("inferrer is null");
     }
     this.inferrer = inferrer;
   }
 
-  public PreferenceTransform2 getPrefTransform() {
+  public final PreferenceTransform2 getPrefTransform() {
     return prefTransform;
   }
 
-  public void setPrefTransform(PreferenceTransform2 prefTransform) {
+  public final void setPrefTransform(PreferenceTransform2 prefTransform) {
     this.prefTransform = prefTransform;
   }
 
-  public CorrelationTransform<?> getCorrelationTransform() {
+  public final CorrelationTransform<Object> getCorrelationTransform() {
     return correlationTransform;
   }
 
-  public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
+  public final void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
     this.correlationTransform = correlationTransform;
   }
 
-  boolean isWeighted() {
+  final boolean isWeighted() {
     return weighted;
   }
 
+  /**
+   * <p>Several subclasses in this package implement this method to actually compute the correlation
+   * from figures computed over users or items. Note that the computations in this class "center" the
+   * data, such that X and Y's mean are 0.</p>
+   *
+   * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
+   * the standard correlation computations as a result.</p>
+   *
+   * @param n total number of users or items
+   * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
+   * both users/items
+   * @param sumX2 sum of the square of user/item preference values, over the first item/user
+   * @param sumY2 sum of the square of the user/item preference values, over the second item/user
+   * @param sumXYdiff2 sum of squares of differences in X and Y values
+   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
+   *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
+   */
+  abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
+
   public double userCorrelation(User user1, User user2) throws TasteException {
 
     if (user1 == null || user2 == null) {
@@ -180,6 +147,7 @@
     double sumY = 0.0;
     double sumY2 = 0.0;
     double sumXY = 0.0;
+    double sumXYdiff2 = 0.0;
     int count = 0;
 
     boolean hasInferrer = inferrer != null;
@@ -226,6 +194,8 @@
         sumX2 += x * x;
         sumY += y;
         sumY2 += y * y;
+        double diff = x - y;
+        sumXYdiff2 += diff * diff;
         count++;
       }
       if (compare <= 0) {
@@ -252,7 +222,7 @@
     double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
     double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
 
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
+    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
 
     if (correlationTransform != null) {
       result = correlationTransform.transformCorrelation(user1, user2, result);
@@ -268,7 +238,7 @@
     return result;
   }
 
-  public double itemCorrelation(Item item1, Item item2) throws TasteException {
+  public final double itemCorrelation(Item item1, Item item2) throws TasteException {
 
     if (item1 == null || item2 == null) {
       throw new IllegalArgumentException("item1 or item2 is null");
@@ -293,6 +263,7 @@
     double sumY = 0.0;
     double sumY2 = 0.0;
     double sumXY = 0.0;
+    double sumXYdiff2 = 0.0;
     int count = 0;
 
     // No, pref inferrers and transforms don't appy here. I think.
@@ -308,6 +279,8 @@
         sumX2 += x * x;
         sumY += y;
         sumY2 += y * y;
+        double diff = x - y;
+        sumXYdiff2 += diff * diff;
         count++;
       }
       if (compare <= 0) {
@@ -334,7 +307,7 @@
     double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
     double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
 
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
+    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
 
     if (correlationTransform != null) {
       result = correlationTransform.transformCorrelation(item1, item2, result);
@@ -345,12 +318,12 @@
     }
 
     if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
+      log.trace("ItemCorrelation between " + item1 + " and " + item2 + " is " + result);
     }
     return result;
   }
 
-  private double normalizeWeightResult(double result, int count, int num) {
+  final double normalizeWeightResult(double result, int count, int num) {
     if (weighted) {
       double scaleFactor = 1.0 - (double) count / (double) (num + 1);
       if (result < 0.0) {
@@ -368,7 +341,7 @@
     return result;
   }
 
-  public void refresh() {
+  public final void refresh() {
     dataModel.refresh();
     if (inferrer != null) {
       inferrer.refresh();
@@ -382,8 +355,8 @@
   }
 
   @Override
-  public String toString() {
-    return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+  public final String toString() {
+    return this.getClass().getSimpleName() + "[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
   }
 
-}
+}
\ No newline at end of file

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelation.java Fri May 30 15:06:08 2008
@@ -17,373 +17,39 @@
 
 package org.apache.mahout.cf.taste.impl.correlation;
 
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
- *
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * <p>An implementation of a "correlation" based on the Euclidean "distance" between two
+ * {@link org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions
+ * and preferences as points along those dimensions, a distance is computed using all
+ * items (dimensions) where both users have expressed a preference for that item. This
+ * is simply the square root of the sum of the squares of differences in position (preference)
+ * along each dimension. The correlation is then computed as 1 / (1 + distance), so the
+ * resulting values are in the range (0,1].</p>
  */
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class EuclideanDistanceCorrelation extends AbstractCorrelation {
 
-  private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
-
-  private final DataModel dataModel;
-  private PreferenceInferrer inferrer;
-  private PreferenceTransform2 prefTransform;
-  private CorrelationTransform<Object> correlationTransform;
-  private boolean weighted;
-
-  /**
-   * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   */
-  public PearsonCorrelation(DataModel dataModel) {
-    this(dataModel, false);
+  public EuclideanDistanceCorrelation(DataModel dataModel) {
+    super(dataModel);
   }
 
-  /**
-   * <p>Creates a weighted {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   * @param weighted
-   */
-  public PearsonCorrelation(DataModel dataModel, boolean weighted) {
-    if (dataModel == null) {
-      throw new IllegalArgumentException("dataModel is null");
-    }
-    this.dataModel = dataModel;
-    this.weighted = weighted;
+  public EuclideanDistanceCorrelation(DataModel dataModel, boolean weighted) {
+    super(dataModel, weighted);
   }
 
-  /**
-   * <p>Several subclasses in this package implement this method to actually compute the correlation
-   * from figures computed over users or items. Note that the computations in this class "center" the
-   * data, such that X and Y's mean are 0.</p>
-   *
-   * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
-   * the standard correlation computations as a result.</p>
-   *
-   * @param n total number of users or items
-   * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
-   * both users/items
-   * @param sumX2 sum of the square of user/item preference values, over the first item/user
-   * @param sumY2 sum of the square of the user/item preference values, over the second item/user
-   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
-   *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
-   */
-  private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
+  double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
     if (n == 0) {
       return Double.NaN;
     }
-    // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
-    // the data is assumed to be centered.
-    double xTerm = Math.sqrt(sumX2);
-    double yTerm = Math.sqrt(sumY2);
-    double denominator = xTerm * yTerm;
+    double denominator = Math.sqrt(sumX2) + Math.sqrt(sumY2);
     if (denominator == 0.0) {
-      // One or both parties has -all- the same ratings;
-      // can't really say much correlation under this measure
       return Double.NaN;
     }
-    return sumXY / denominator;
-  }
-
-  DataModel getDataModel() {
-    return dataModel;
-  }
-
-  PreferenceInferrer getPreferenceInferrer() {
-    return inferrer;
-  }
-
-  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
-    if (inferrer == null) {
-      throw new IllegalArgumentException("inferrer is null");
-    }
-    this.inferrer = inferrer;
-  }
-
-  public PreferenceTransform2 getPrefTransform() {
-    return prefTransform;
-  }
-
-  public void setPrefTransform(PreferenceTransform2 prefTransform) {
-    this.prefTransform = prefTransform;
-  }
-
-  public CorrelationTransform<?> getCorrelationTransform() {
-    return correlationTransform;
-  }
-
-  public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
-    this.correlationTransform = correlationTransform;
-  }
-
-  boolean isWeighted() {
-    return weighted;
-  }
-
-  public double userCorrelation(User user1, User user2) throws TasteException {
-
-    if (user1 == null || user2 == null) {
-      throw new IllegalArgumentException("user1 or user2 is null");
-    }
-
-    Preference[] xPrefs = user1.getPreferencesAsArray();
-    Preference[] yPrefs = user2.getPreferencesAsArray();
-
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return Double.NaN;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    Item xIndex = xPref.getItem();
-    Item yIndex = yPref.getItem();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    boolean hasInferrer = inferrer != null;
-    boolean hasPrefTransform = prefTransform != null;
-
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (hasInferrer || compare == 0) {
-        double x;
-        double y;
-        if (compare == 0) {
-          // Both users expressed a preference for the item
-          if (hasPrefTransform) {
-            x = prefTransform.getTransformedValue(xPref);
-            y = prefTransform.getTransformedValue(yPref);
-          } else {
-            x = xPref.getValue();
-            y = yPref.getValue();
-          }
-        } else {
-          // Only one user expressed a preference, but infer the other one's preference and tally
-          // as if the other user expressed that preference
-          if (compare < 0) {
-            // X has a value; infer Y's
-            if (hasPrefTransform) {
-              x = prefTransform.getTransformedValue(xPref);
-            } else {
-              x = xPref.getValue();
-            }
-            y = inferrer.inferPreference(user2, xIndex);
-          } else {
-            // compare > 0
-            // Y has a value; infer X's
-            x = inferrer.inferPreference(user1, yIndex);
-            if (hasPrefTransform) {
-              y = prefTransform.getTransformedValue(yPref);
-            } else {
-              y = yPref.getValue();
-            }
-          }
-        }
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getItem();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getItem();
-      }
-    }
-
-    // "Center" the data. If my math is correct, this'll do it.
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(user1, user2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumItems());
-    }
-
-    if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
-    }
-    return result;
-  }
-
-  public double itemCorrelation(Item item1, Item item2) throws TasteException {
-
-    if (item1 == null || item2 == null) {
-      throw new IllegalArgumentException("item1 or item2 is null");
-    }
-
-    Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
-    Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return Double.NaN;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    User xIndex = xPref.getUser();
-    User yIndex = yPref.getUser();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    // No, pref inferrers and transforms don't appy here. I think.
-
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (compare == 0) {
-        // Both users expressed a preference for the item
-        double x = xPref.getValue();
-        double y = yPref.getValue();
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getUser();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getUser();
-      }
-    }
-
-    // See comments above on these computations
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(item1, item2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumUsers());
-    }
-
-    if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
-    }
-    return result;
-  }
-
-  private double normalizeWeightResult(double result, int count, int num) {
-    if (weighted) {
-      double scaleFactor = 1.0 - (double) count / (double) (num + 1);
-      if (result < 0.0) {
-        result = -1.0 + scaleFactor * (1.0 + result);
-      } else {
-        result = 1.0 - scaleFactor * (1.0 - result);
-      }
-    }
-    // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
-    if (result < -1.0) {
-      result = -1.0;
-    } else if (result > 1.0) {
-      result = 1.0;
-    }
-    return result;
-  }
-
-  public void refresh() {
-    dataModel.refresh();
-    if (inferrer != null) {
-      inferrer.refresh();
-    }
-    if (prefTransform != null) {
-      prefTransform.refresh();
-    }
-    if (correlationTransform != null) {
-      correlationTransform.refresh();
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+    // normalize a bit for magnitude
+    sumXYdiff2 /= denominator;
+    // divide by n below to not automatically give users with more overlap more correlation
+    return 1.0 / (1.0 + (Math.sqrt(sumXYdiff2) / (double) n));
   }
 
-}
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java?rev=661892&r1=661891&r2=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java Fri May 30 15:06:08 2008
@@ -17,18 +17,9 @@
 
 package org.apache.mahout.cf.taste.impl.correlation;
 
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
@@ -54,56 +45,17 @@
  * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
  * angle between the two vectors defined by the users' preference values.</p>
  */
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class PearsonCorrelation extends AbstractCorrelation {
 
-  private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
-
-  private final DataModel dataModel;
-  private PreferenceInferrer inferrer;
-  private PreferenceTransform2 prefTransform;
-  private CorrelationTransform<Object> correlationTransform;
-  private boolean weighted;
-
-  /**
-   * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   */
   public PearsonCorrelation(DataModel dataModel) {
-    this(dataModel, false);
+    super(dataModel);
   }
 
-  /**
-   * <p>Creates a weighted {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   * @param weighted
-   */
   public PearsonCorrelation(DataModel dataModel, boolean weighted) {
-    if (dataModel == null) {
-      throw new IllegalArgumentException("dataModel is null");
-    }
-    this.dataModel = dataModel;
-    this.weighted = weighted;
+    super(dataModel, weighted);
   }
 
-  /**
-   * <p>Several subclasses in this package implement this method to actually compute the correlation
-   * from figures computed over users or items. Note that the computations in this class "center" the
-   * data, such that X and Y's mean are 0.</p>
-   *
-   * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
-   * the standard correlation computations as a result.</p>
-   *
-   * @param n total number of users or items
-   * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
-   * both users/items
-   * @param sumX2 sum of the square of user/item preference values, over the first item/user
-   * @param sumY2 sum of the square of the user/item preference values, over the second item/user
-   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
-   *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
-   */
-  private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
+  double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
     if (n == 0) {
       return Double.NaN;
     }
@@ -120,270 +72,4 @@
     return sumXY / denominator;
   }
 
-  DataModel getDataModel() {
-    return dataModel;
-  }
-
-  PreferenceInferrer getPreferenceInferrer() {
-    return inferrer;
-  }
-
-  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
-    if (inferrer == null) {
-      throw new IllegalArgumentException("inferrer is null");
-    }
-    this.inferrer = inferrer;
-  }
-
-  public PreferenceTransform2 getPrefTransform() {
-    return prefTransform;
-  }
-
-  public void setPrefTransform(PreferenceTransform2 prefTransform) {
-    this.prefTransform = prefTransform;
-  }
-
-  public CorrelationTransform<?> getCorrelationTransform() {
-    return correlationTransform;
-  }
-
-  public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
-    this.correlationTransform = correlationTransform;
-  }
-
-  boolean isWeighted() {
-    return weighted;
-  }
-
-  public double userCorrelation(User user1, User user2) throws TasteException {
-
-    if (user1 == null || user2 == null) {
-      throw new IllegalArgumentException("user1 or user2 is null");
-    }
-
-    Preference[] xPrefs = user1.getPreferencesAsArray();
-    Preference[] yPrefs = user2.getPreferencesAsArray();
-
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return Double.NaN;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    Item xIndex = xPref.getItem();
-    Item yIndex = yPref.getItem();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    boolean hasInferrer = inferrer != null;
-    boolean hasPrefTransform = prefTransform != null;
-
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (hasInferrer || compare == 0) {
-        double x;
-        double y;
-        if (compare == 0) {
-          // Both users expressed a preference for the item
-          if (hasPrefTransform) {
-            x = prefTransform.getTransformedValue(xPref);
-            y = prefTransform.getTransformedValue(yPref);
-          } else {
-            x = xPref.getValue();
-            y = yPref.getValue();
-          }
-        } else {
-          // Only one user expressed a preference, but infer the other one's preference and tally
-          // as if the other user expressed that preference
-          if (compare < 0) {
-            // X has a value; infer Y's
-            if (hasPrefTransform) {
-              x = prefTransform.getTransformedValue(xPref);
-            } else {
-              x = xPref.getValue();
-            }
-            y = inferrer.inferPreference(user2, xIndex);
-          } else {
-            // compare > 0
-            // Y has a value; infer X's
-            x = inferrer.inferPreference(user1, yIndex);
-            if (hasPrefTransform) {
-              y = prefTransform.getTransformedValue(yPref);
-            } else {
-              y = yPref.getValue();
-            }
-          }
-        }
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getItem();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getItem();
-      }
-    }
-
-    // "Center" the data. If my math is correct, this'll do it.
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(user1, user2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumItems());
-    }
-
-    if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
-    }
-    return result;
-  }
-
-  public double itemCorrelation(Item item1, Item item2) throws TasteException {
-
-    if (item1 == null || item2 == null) {
-      throw new IllegalArgumentException("item1 or item2 is null");
-    }
-
-    Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
-    Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
-
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
-      return Double.NaN;
-    }
-
-    Preference xPref = xPrefs[0];
-    Preference yPref = yPrefs[0];
-    User xIndex = xPref.getUser();
-    User yIndex = yPref.getUser();
-    int xPrefIndex = 1;
-    int yPrefIndex = 1;
-
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    // No, pref inferrers and transforms don't appy here. I think.
-
-    while (true) {
-      int compare = xIndex.compareTo(yIndex);
-      if (compare == 0) {
-        // Both users expressed a preference for the item
-        double x = xPref.getValue();
-        double y = yPref.getValue();
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
-      }
-      if (compare <= 0) {
-        if (xPrefIndex == xPrefs.length) {
-          break;
-        }
-        xPref = xPrefs[xPrefIndex++];
-        xIndex = xPref.getUser();
-      }
-      if (compare >= 0) {
-        if (yPrefIndex == yPrefs.length) {
-          break;
-        }
-        yPref = yPrefs[yPrefIndex++];
-        yIndex = yPref.getUser();
-      }
-    }
-
-    // See comments above on these computations
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
-
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(item1, item2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumUsers());
-    }
-
-    if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
-    }
-    return result;
-  }
-
-  private double normalizeWeightResult(double result, int count, int num) {
-    if (weighted) {
-      double scaleFactor = 1.0 - (double) count / (double) (num + 1);
-      if (result < 0.0) {
-        result = -1.0 + scaleFactor * (1.0 + result);
-      } else {
-        result = 1.0 - scaleFactor * (1.0 - result);
-      }
-    }
-    // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
-    if (result < -1.0) {
-      result = -1.0;
-    } else if (result > 1.0) {
-      result = 1.0;
-    }
-    return result;
-  }
-
-  public void refresh() {
-    dataModel.refresh();
-    if (inferrer != null) {
-      inferrer.refresh();
-    }
-    if (prefTransform != null) {
-      prefTransform.refresh();
-    }
-    if (correlationTransform != null) {
-      correlationTransform.refresh();
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
-  }
-
 }

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java (from r657697, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelation.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelation.java Fri May 30 15:06:08 2008
@@ -17,145 +17,46 @@
 
 package org.apache.mahout.cf.taste.impl.correlation;
 
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
 import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
 import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
-import org.apache.mahout.cf.taste.correlation.UserCorrelation;
-import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
-import org.apache.mahout.cf.taste.transforms.PreferenceTransform2;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>An implementation of the Pearson correlation. For {@link User}s X and Y, the following values
- * are calculated:</p>
- *
- * <ul>
- * <li>sumX2: sum of the square of all X's preference values</li>
- * <li>sumY2: sum of the square of all Y's preference values</li>
- * <li>sumXY: sum of the product of X and Y's preference value for all items for which both
- * X and Y express a preference</li>
- * </ul>
- *
- * <p>The correlation is then:
- *
- * <p><code>sumXY / sqrt(sumX2 * sumY2)</code></p>
- *
- * <p>where <code>size</code> is the number of {@link Item}s in the {@link DataModel}.</p>
+ * <p>An implementation of a "correlation" based on the
+ * <a href="http://en.wikipedia.org/wiki/Jaccard_index">Tanimoto coefficient</a>, or extended
+ * Jaccard coefficient.</p>
+ *
+ * <p>This is intended for "binary" data sets where a user either expersses a generic "yes" preference
+ * for an item or has no preference. The actual preference values do not matter here, only their presence
+ * or absence.</p>
  *
- * <p>Note that this correlation "centers" its data, shifts the user's preference values so that
- * each of their means is 0. This is necessary to achieve expected behavior on all data sets.</p>
- *
- * <p>This correlation implementation is equivalent to the cosine measure correlation since the data it
- * receives is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the
- * angle between the two vectors defined by the users' preference values.</p>
+ * <p>The value returned is in [0,1].</p>
  */
-public final class PearsonCorrelation implements UserCorrelation, ItemCorrelation {
+public final class TanimotoCoefficientCorrelation implements UserCorrelation, ItemCorrelation {
 
-  private static final Logger log = LoggerFactory.getLogger(PearsonCorrelation.class);
+  private static final Logger log = LoggerFactory.getLogger(TanimotoCoefficientCorrelation.class);
 
   private final DataModel dataModel;
-  private PreferenceInferrer inferrer;
-  private PreferenceTransform2 prefTransform;
-  private CorrelationTransform<Object> correlationTransform;
-  private boolean weighted;
-
-  /**
-   * <p>Creates a normal (unweighted) {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   */
-  public PearsonCorrelation(DataModel dataModel) {
-    this(dataModel, false);
-  }
 
-  /**
-   * <p>Creates a weighted {@link PearsonCorrelation}.</p>
-   *
-   * @param dataModel
-   * @param weighted
-   */
-  public PearsonCorrelation(DataModel dataModel, boolean weighted) {
-    if (dataModel == null) {
-      throw new IllegalArgumentException("dataModel is null");
-    }
+  public TanimotoCoefficientCorrelation(DataModel dataModel) {
     this.dataModel = dataModel;
-    this.weighted = weighted;
   }
 
   /**
-   * <p>Several subclasses in this package implement this method to actually compute the correlation
-   * from figures computed over users or items. Note that the computations in this class "center" the
-   * data, such that X and Y's mean are 0.</p>
-   *
-   * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
-   * the standard correlation computations as a result.</p>
-   *
-   * @param n total number of users or items
-   * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
-   * both users/items
-   * @param sumX2 sum of the square of user/item preference values, over the first item/user
-   * @param sumY2 sum of the square of the user/item preference values, over the second item/user
-   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
-   *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
+   * @throws UnsupportedOperationException
    */
-  private static double computeResult(int n, double sumXY, double sumX2, double sumY2) {
-    if (n == 0) {
-      return Double.NaN;
-    }
-    // Note that sum of X and sum of Y don't appear here since they are assumed to be 0;
-    // the data is assumed to be centered.
-    double xTerm = Math.sqrt(sumX2);
-    double yTerm = Math.sqrt(sumY2);
-    double denominator = xTerm * yTerm;
-    if (denominator == 0.0) {
-      // One or both parties has -all- the same ratings;
-      // can't really say much correlation under this measure
-      return Double.NaN;
-    }
-    return sumXY / denominator;
-  }
-
-  DataModel getDataModel() {
-    return dataModel;
-  }
-
-  PreferenceInferrer getPreferenceInferrer() {
-    return inferrer;
-  }
-
   public void setPreferenceInferrer(PreferenceInferrer inferrer) {
-    if (inferrer == null) {
-      throw new IllegalArgumentException("inferrer is null");
-    }
-    this.inferrer = inferrer;
-  }
-
-  public PreferenceTransform2 getPrefTransform() {
-    return prefTransform;
-  }
-
-  public void setPrefTransform(PreferenceTransform2 prefTransform) {
-    this.prefTransform = prefTransform;
+    throw new UnsupportedOperationException();
   }
 
-  public CorrelationTransform<?> getCorrelationTransform() {
-    return correlationTransform;
-  }
-
-  public void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
-    this.correlationTransform = correlationTransform;
-  }
-
-  boolean isWeighted() {
-    return weighted;
-  }
-
-  public double userCorrelation(User user1, User user2) throws TasteException {
+  public final double userCorrelation(User user1, User user2) throws TasteException {
 
     if (user1 == null || user2 == null) {
       throw new IllegalArgumentException("user1 or user2 is null");
@@ -164,9 +65,12 @@
     Preference[] xPrefs = user1.getPreferencesAsArray();
     Preference[] yPrefs = user2.getPreferencesAsArray();
 
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
+    if (xPrefs.length == 0 && yPrefs.length == 0) {
       return Double.NaN;
     }
+    if (xPrefs.length == 0 || yPrefs.length == 0) {
+      return 0.0;
+    }
 
     Preference xPref = xPrefs[0];
     Preference yPref = yPrefs[0];
@@ -175,58 +79,11 @@
     int xPrefIndex = 1;
     int yPrefIndex = 1;
 
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    boolean hasInferrer = inferrer != null;
-    boolean hasPrefTransform = prefTransform != null;
-
+    int intersectionSize = 0;
     while (true) {
       int compare = xIndex.compareTo(yIndex);
-      if (hasInferrer || compare == 0) {
-        double x;
-        double y;
-        if (compare == 0) {
-          // Both users expressed a preference for the item
-          if (hasPrefTransform) {
-            x = prefTransform.getTransformedValue(xPref);
-            y = prefTransform.getTransformedValue(yPref);
-          } else {
-            x = xPref.getValue();
-            y = yPref.getValue();
-          }
-        } else {
-          // Only one user expressed a preference, but infer the other one's preference and tally
-          // as if the other user expressed that preference
-          if (compare < 0) {
-            // X has a value; infer Y's
-            if (hasPrefTransform) {
-              x = prefTransform.getTransformedValue(xPref);
-            } else {
-              x = xPref.getValue();
-            }
-            y = inferrer.inferPreference(user2, xIndex);
-          } else {
-            // compare > 0
-            // Y has a value; infer X's
-            x = inferrer.inferPreference(user1, yIndex);
-            if (hasPrefTransform) {
-              y = prefTransform.getTransformedValue(yPref);
-            } else {
-              y = yPref.getValue();
-            }
-          }
-        }
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
+      if (compare == 0) {
+        intersectionSize++;
       }
       if (compare <= 0) {
         if (xPrefIndex == xPrefs.length) {
@@ -244,31 +101,17 @@
       }
     }
 
-    // "Center" the data. If my math is correct, this'll do it.
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
+    int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
 
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(user1, user2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumItems());
-    }
+    double result = (double) intersectionSize / (double) unionSize;
 
     if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + user1 + " and " + user2 + " is " + result);
+      log.trace("User correlation between " + user1 + " and " + user2 + " is " + result);
     }
     return result;
   }
 
-  public double itemCorrelation(Item item1, Item item2) throws TasteException {
+  public final double itemCorrelation(Item item1, Item item2) throws TasteException {
 
     if (item1 == null || item2 == null) {
       throw new IllegalArgumentException("item1 or item2 is null");
@@ -277,9 +120,12 @@
     Preference[] xPrefs = dataModel.getPreferencesForItemAsArray(item1.getID());
     Preference[] yPrefs = dataModel.getPreferencesForItemAsArray(item2.getID());
 
-    if (xPrefs.length == 0 || yPrefs.length == 0) {
+    if (xPrefs.length == 0 && yPrefs.length == 0) {
       return Double.NaN;
     }
+    if (xPrefs.length == 0 || yPrefs.length == 0) {
+      return 0.0;
+    }
 
     Preference xPref = xPrefs[0];
     Preference yPref = yPrefs[0];
@@ -288,27 +134,11 @@
     int xPrefIndex = 1;
     int yPrefIndex = 1;
 
-    double sumX = 0.0;
-    double sumX2 = 0.0;
-    double sumY = 0.0;
-    double sumY2 = 0.0;
-    double sumXY = 0.0;
-    int count = 0;
-
-    // No, pref inferrers and transforms don't appy here. I think.
-
+    int intersectionSize = 0;
     while (true) {
       int compare = xIndex.compareTo(yIndex);
       if (compare == 0) {
-        // Both users expressed a preference for the item
-        double x = xPref.getValue();
-        double y = yPref.getValue();
-        sumXY += x * y;
-        sumX += x;
-        sumX2 += x * x;
-        sumY += y;
-        sumY2 += y * y;
-        count++;
+        intersectionSize++;
       }
       if (compare <= 0) {
         if (xPrefIndex == xPrefs.length) {
@@ -326,64 +156,23 @@
       }
     }
 
-    // See comments above on these computations
-    double n = (double) count;
-    double meanX = sumX / n;
-    double meanY = sumY / n;
-    double centeredSumXY = sumXY - meanY * sumX - meanX * sumY + n * meanX * meanY;
-    double centeredSumX2 = sumX2 - 2.0 * meanX * sumX + n * meanX * meanX;
-    double centeredSumY2 = sumY2 - 2.0 * meanY * sumY + n * meanY * meanY;
+    int unionSize = xPrefs.length + yPrefs.length - intersectionSize;
 
-    double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2);
-
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(item1, item2, result);
-    }
-
-    if (!Double.isNaN(result)) {
-      result = normalizeWeightResult(result, count, dataModel.getNumUsers());
-    }
+    double result = (double) intersectionSize / (double) unionSize;
 
     if (log.isTraceEnabled()) {
-      log.trace("UserCorrelation between " + item1 + " and " + item2 + " is " + result);
-    }
-    return result;
-  }
-
-  private double normalizeWeightResult(double result, int count, int num) {
-    if (weighted) {
-      double scaleFactor = 1.0 - (double) count / (double) (num + 1);
-      if (result < 0.0) {
-        result = -1.0 + scaleFactor * (1.0 + result);
-      } else {
-        result = 1.0 - scaleFactor * (1.0 - result);
-      }
-    }
-    // Make sure the result is not accidentally a little outside [-1.0, 1.0] due to rounding:
-    if (result < -1.0) {
-      result = -1.0;
-    } else if (result > 1.0) {
-      result = 1.0;
+      log.trace("Item correlation between " + item1 + " and " + item2 + " is " + result);
     }
     return result;
   }
 
   public void refresh() {
     dataModel.refresh();
-    if (inferrer != null) {
-      inferrer.refresh();
-    }
-    if (prefTransform != null) {
-      prefTransform.refresh();
-    }
-    if (correlationTransform != null) {
-      correlationTransform.refresh();
-    }
   }
 
   @Override
-  public String toString() {
-    return "PearsonCorrelation[dataModel:" + dataModel + ",inferrer:" + inferrer + ']';
+  public final String toString() {
+    return "TanimotoCoefficientCorrelation[dataModel:" + dataModel + ']';
   }
 
-}
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java?rev=661892&r1=661891&r2=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java Fri May 30 15:06:08 2008
@@ -44,11 +44,13 @@
     RandomUtils.useTestSeed();
   }
 
-  public static User getUser(String userID, double... values) {
+  public static User getUser(String userID, Double... values) {
     List<Preference> prefs = new ArrayList<Preference>(values.length);
     int i = 0;
-    for (double value : values) {
-      prefs.add(new GenericPreference(null, new GenericItem<String>(String.valueOf(i)), value));
+    for (Double value : values) {
+      if (value != null) {
+        prefs.add(new GenericPreference(null, new GenericItem<String>(String.valueOf(i)), value));
+      }
       i++;
     }
     return new GenericUser<String>(userID, prefs);

Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java (from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/EuclideanDistanceCorrelationTest.java Fri May 30 15:06:08 2008
@@ -28,15 +28,15 @@
 import java.util.Collections;
 
 /**
- * <p>Tests {@link PearsonCorrelation}.</p>
+ * <p>Tests {@link EuclideanDistanceCorrelation}.</p>
  */
-public final class PearsonCorrelationTest extends CorrelationTestCase {
+public final class EuclideanDistanceCorrelationTest extends CorrelationTestCase {
 
   public void testFullCorrelation1() throws Exception {
     User user1 = getUser("test1", 3.0, -2.0);
     User user2 = getUser("test2", 3.0, -2.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
     assertCorrelationEquals(1.0, correlation);
   }
 
@@ -44,7 +44,7 @@
     User user1 = getUser("test1", 3.0, -2.0);
     User user2 = getUser("test2", 3.0, -2.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
+    double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
     assertCorrelationEquals(1.0, correlation);
   }
 
@@ -52,8 +52,7 @@
     User user1 = getUser("test1", 3.0, 3.0);
     User user2 = getUser("test2", 3.0, 3.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    // Yeah, undefined in this case
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
     assertTrue(Double.isNaN(correlation));
   }
 
@@ -61,16 +60,16 @@
     User user1 = getUser("test1", 3.0, -2.0);
     User user2 = getUser("test2", -3.0, 2.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.424465381883345, correlation);
   }
 
   public void testNoCorrelation1Weighted() throws Exception {
     User user1 = getUser("test1", 3.0, -2.0);
     User user2 = getUser("test2", -3.0, 2.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
+    double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.8081551272944483, correlation);
   }
 
   public void testNoCorrelation2() throws Exception {
@@ -79,7 +78,7 @@
     Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
     GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
     assertTrue(Double.isNaN(correlation));
   }
 
@@ -87,24 +86,24 @@
     User user1 = getUser("test1", 90.0, 80.0, 70.0);
     User user2 = getUser("test2", 70.0, 80.0, 90.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.3606507916004517, correlation);
   }
 
   public void testSimple() throws Exception {
     User user1 = getUser("test1", 1.0, 2.0, 3.0);
     User user2 = getUser("test2", 2.0, 5.0, 6.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.9607689228305227, correlation);
+    double correlation = new EuclideanDistanceCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.5896248568217328, correlation);
   }
 
   public void testSimpleWeighted() throws Exception {
     User user1 = getUser("test1", 1.0, 2.0, 3.0);
     User user2 = getUser("test2", 2.0, 5.0, 6.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.9901922307076306, correlation);
+    double correlation = new EuclideanDistanceCorrelation(dataModel, true).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.8974062142054332, correlation);
   }
 
   public void testFullItemCorrelation1() throws Exception {
@@ -112,7 +111,7 @@
     User user2 = getUser("test2", -2.0, -2.0);
     DataModel dataModel = getDataModel(user1, user2);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
     assertCorrelationEquals(1.0, correlation);
   }
 
@@ -121,7 +120,7 @@
     User user2 = getUser("test2", 3.0, 3.0);
     DataModel dataModel = getDataModel(user1, user2);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
     // Yeah, undefined in this case
     assertTrue(Double.isNaN(correlation));
   }
@@ -131,8 +130,8 @@
     User user2 = getUser("test2", -2.0, 2.0);
     DataModel dataModel = getDataModel(user1, user2);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(-1.0, correlation);
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+    assertCorrelationEquals(0.424465381883345, correlation);
   }
 
   public void testNoItemCorrelation2() throws Exception {
@@ -142,7 +141,7 @@
     GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
     DataModel dataModel = getDataModel(user1, user2);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
     assertTrue(Double.isNaN(correlation));
   }
 
@@ -152,8 +151,8 @@
     User user3 = getUser("test3", 70.0, 90.0);
     DataModel dataModel = getDataModel(user1, user2, user3);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(-1.0, correlation);
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+    assertCorrelationEquals(0.3606507916004517, correlation);
   }
 
   public void testSimpleItem() throws Exception {
@@ -162,8 +161,8 @@
     User user3 = getUser("test3", 3.0, 6.0);
     DataModel dataModel = getDataModel(user1, user2, user3);
     double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(0.9607689228305227, correlation);
+            new EuclideanDistanceCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
+    assertCorrelationEquals(0.5896248568217328, correlation);
   }
 
   public void testSimpleItemWeighted() throws Exception {
@@ -171,14 +170,14 @@
     User user2 = getUser("test2", 2.0, 5.0);
     User user3 = getUser("test3", 3.0, 6.0);
     DataModel dataModel = getDataModel(user1, user2, user3);
-    ItemCorrelation itemCorrelation = new PearsonCorrelation(dataModel, true);
+    ItemCorrelation itemCorrelation = new EuclideanDistanceCorrelation(dataModel, true);
     double correlation = itemCorrelation.itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(0.9901922307076306, correlation);
+    assertCorrelationEquals(0.8974062142054332, correlation);
   }
 
   public void testRefresh() {
     // Make sure this doesn't throw an exception
-    new PearsonCorrelation(getDataModel()).refresh();
+    new EuclideanDistanceCorrelation(getDataModel()).refresh();
   }
 
-}
+}
\ No newline at end of file

Copied: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java (from r657697, lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java?p2=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java&p1=lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java&r1=657697&r2=661892&rev=661892&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/PearsonCorrelationTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/correlation/TanimotoCoefficientCorrelationTest.java Fri May 30 15:06:08 2008
@@ -17,168 +17,73 @@
 
 package org.apache.mahout.cf.taste.impl.correlation;
 
-import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
-import org.apache.mahout.cf.taste.impl.model.GenericItem;
-import org.apache.mahout.cf.taste.impl.model.GenericPreference;
-import org.apache.mahout.cf.taste.impl.model.GenericUser;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
 
-import java.util.Collections;
-
 /**
- * <p>Tests {@link PearsonCorrelation}.</p>
+ * <p>Tests {@link TanimotoCoefficientCorrelation}.</p>
  */
-public final class PearsonCorrelationTest extends CorrelationTestCase {
-
-  public void testFullCorrelation1() throws Exception {
-    User user1 = getUser("test1", 3.0, -2.0);
-    User user2 = getUser("test2", 3.0, -2.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(1.0, correlation);
-  }
-
-  public void testFullCorrelation1Weighted() throws Exception {
-    User user1 = getUser("test1", 3.0, -2.0);
-    User user2 = getUser("test2", 3.0, -2.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
-    assertCorrelationEquals(1.0, correlation);
-  }
-
-  public void testFullCorrelation2() throws Exception {
-    User user1 = getUser("test1", 3.0, 3.0);
-    User user2 = getUser("test2", 3.0, 3.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    // Yeah, undefined in this case
-    assertTrue(Double.isNaN(correlation));
-  }
+public final class TanimotoCoefficientCorrelationTest extends CorrelationTestCase {
 
   public void testNoCorrelation1() throws Exception {
-    User user1 = getUser("test1", 3.0, -2.0);
-    User user2 = getUser("test2", -3.0, 2.0);
+    User user1 = getUser("test1");
+    User user2 = getUser("test2");
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
-  }
-
-  public void testNoCorrelation1Weighted() throws Exception {
-    User user1 = getUser("test1", 3.0, -2.0);
-    User user2 = getUser("test2", -3.0, 2.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertTrue(Double.isNaN(correlation));
   }
 
   public void testNoCorrelation2() throws Exception {
-    Preference pref1 = new GenericPreference(null, new GenericItem<String>("1"), 1.0);
-    GenericUser<String> user1 = new GenericUser<String>("test1", Collections.singletonList(pref1));
-    Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
-    GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
+    User user1 = getUser("test1");
+    User user2 = getUser("test2", 1.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertTrue(Double.isNaN(correlation));
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.0, correlation);
   }
 
-  public void testNoCorrelation3() throws Exception {
-    User user1 = getUser("test1", 90.0, 80.0, 70.0);
-    User user2 = getUser("test2", 70.0, 80.0, 90.0);
+  public void testNoCorrelation() throws Exception {
+    User user1 = getUser("test1", null, 2.0, 3.0);
+    User user2 = getUser("test2", 1.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(-1.0, correlation);
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.0, correlation);
   }
 
-  public void testSimple() throws Exception {
-    User user1 = getUser("test1", 1.0, 2.0, 3.0);
-    User user2 = getUser("test2", 2.0, 5.0, 6.0);
+  public void testFullCorrelation1() throws Exception {
+    User user1 = getUser("test1", 1.0);
+    User user2 = getUser("test2", 1.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.9607689228305227, correlation);
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(1.0, correlation);
   }
 
-  public void testSimpleWeighted() throws Exception {
+  public void testFullCorrelation2() throws Exception {
     User user1 = getUser("test1", 1.0, 2.0, 3.0);
-    User user2 = getUser("test2", 2.0, 5.0, 6.0);
+    User user2 = getUser("test2", 1.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation = new PearsonCorrelation(dataModel, true).userCorrelation(user1, user2);
-    assertCorrelationEquals(0.9901922307076306, correlation);
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertCorrelationEquals(0.3333333333333333, correlation);
   }
 
-  public void testFullItemCorrelation1() throws Exception {
-    User user1 = getUser("test1", 3.0, 3.0);
-    User user2 = getUser("test2", -2.0, -2.0);
+  public void testCorrelation1() throws Exception {
+    User user1 = getUser("test1", null, 2.0, 3.0);
+    User user2 = getUser("test2", 1.0, 1.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(1.0, correlation);
-  }
-
-  public void testFullItemCorrelation2() throws Exception {
-    User user1 = getUser("test1", 3.0, 3.0);
-    User user2 = getUser("test2", 3.0, 3.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    // Yeah, undefined in this case
-    assertTrue(Double.isNaN(correlation));
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertEquals(0.3333333333333333, correlation);
   }
 
-  public void testNoItemCorrelation1() throws Exception {
-    User user1 = getUser("test1", 3.0, -3.0);
-    User user2 = getUser("test2", -2.0, 2.0);
-    DataModel dataModel = getDataModel(user1, user2);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(-1.0, correlation);
-  }
-
-  public void testNoItemCorrelation2() throws Exception {
-    Preference pref1 = new GenericPreference(null, new GenericItem<String>("1"), 1.0);
-    GenericUser<String> user1 = new GenericUser<String>("test1", Collections.singletonList(pref1));
-    Preference pref2 = new GenericPreference(null, new GenericItem<String>("2"), 1.0);
-    GenericUser<String> user2 = new GenericUser<String>("test2", Collections.singletonList(pref2));
+  public void testCorrelation2() throws Exception {
+    User user1 = getUser("test1", null, 2.0, 3.0, 1.0);
+    User user2 = getUser("test2", 1.0, 1.0, null, 0.0);
     DataModel dataModel = getDataModel(user1, user2);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("1"), dataModel.getItem("2"));
-    assertTrue(Double.isNaN(correlation));
-  }
-
-  public void testNoItemCorrelation3() throws Exception {
-    User user1 = getUser("test1", 90.0, 70.0);
-    User user2 = getUser("test2", 80.0, 80.0);
-    User user3 = getUser("test3", 70.0, 90.0);
-    DataModel dataModel = getDataModel(user1, user2, user3);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(-1.0, correlation);
-  }
-
-  public void testSimpleItem() throws Exception {
-    User user1 = getUser("test1", 1.0, 2.0);
-    User user2 = getUser("test2", 2.0, 5.0);
-    User user3 = getUser("test3", 3.0, 6.0);
-    DataModel dataModel = getDataModel(user1, user2, user3);
-    double correlation =
-            new PearsonCorrelation(dataModel).itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(0.9607689228305227, correlation);
-  }
-
-  public void testSimpleItemWeighted() throws Exception {
-    User user1 = getUser("test1", 1.0, 2.0);
-    User user2 = getUser("test2", 2.0, 5.0);
-    User user3 = getUser("test3", 3.0, 6.0);
-    DataModel dataModel = getDataModel(user1, user2, user3);
-    ItemCorrelation itemCorrelation = new PearsonCorrelation(dataModel, true);
-    double correlation = itemCorrelation.itemCorrelation(dataModel.getItem("0"), dataModel.getItem("1"));
-    assertCorrelationEquals(0.9901922307076306, correlation);
+    double correlation = new TanimotoCoefficientCorrelation(dataModel).userCorrelation(user1, user2);
+    assertEquals(0.5, correlation);
   }
 
   public void testRefresh() {
     // Make sure this doesn't throw an exception
-    new PearsonCorrelation(getDataModel()).refresh();
+    new TanimotoCoefficientCorrelation(getDataModel()).refresh();
   }
 
-}
+}
\ No newline at end of file