You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/26 16:49:38 UTC

svn commit: r938070 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java

Author: srowen
Date: Mon Apr 26 14:49:37 2010
New Revision: 938070

URL: http://svn.apache.org/viewvc?rev=938070&view=rev
Log:
Remove normalization by vector length since it was possibly confusing and not necessarily desirable in all cases. Simplified version is comparably effective.

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=938070&r1=938069&r2=938070&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Mon Apr 26 14:49:37 2010
@@ -54,17 +54,8 @@ public final class EuclideanDistanceSimi
   
   @Override
   double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) {
-    if (n == 0) {
-      return Double.NaN;
-    }
-    double denominator = Math.sqrt(sumX2) + Math.sqrt(sumY2);
-    if (denominator == 0.0) {
-      return Double.NaN;
-    }
-    // normalize a bit for magnitude
-    sumXYdiff2 /= denominator;
-    // divide by n below to not automatically give users with more overlap more similarity
-    return 1.0 / (1.0 + Math.sqrt(sumXYdiff2) / n);
+    // divide denominator by n below to not automatically give users with more overlap more similarity
+    return n / (1.0 + Math.sqrt(sumXYdiff2));
   }
   
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java?rev=938070&r1=938069&r2=938070&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java Mon Apr 26 14:49:37 2010
@@ -55,7 +55,7 @@ public final class EuclideanDistanceSimi
                     {3.0, 3.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2);
-    assertTrue(Double.isNaN(correlation));
+    assertEquals(1.0, correlation);
   }
 
   public void testNoCorrelation1() throws Exception {
@@ -66,7 +66,7 @@ public final class EuclideanDistanceSimi
                     {-3.0, 2.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2);
-    assertCorrelationEquals(0.424465381883345, correlation);
+    assertCorrelationEquals(0.24357264905599915, correlation);
   }
 
   public void testNoCorrelation1Weighted() throws Exception {
@@ -77,7 +77,7 @@ public final class EuclideanDistanceSimi
                     {-3.0, 2.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED).userSimilarity(1, 2);
-    assertCorrelationEquals(0.8081551272944483, correlation);
+    assertCorrelationEquals(0.747857549685333, correlation);
   }
 
   public void testNoCorrelation2() throws Exception {
@@ -88,7 +88,7 @@ public final class EuclideanDistanceSimi
                     {null, null, 1.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2);
-    assertTrue(Double.isNaN(correlation));
+    assertEquals(0.0, correlation);
   }
 
   public void testNoCorrelation3() throws Exception {
@@ -99,7 +99,7 @@ public final class EuclideanDistanceSimi
                     {70.0, 80.0, 90.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2);
-    assertCorrelationEquals(0.3606507916004517, correlation);
+    assertCorrelationEquals(0.10244407226831752, correlation);
   }
 
   public void testSimple() throws Exception {
@@ -110,7 +110,7 @@ public final class EuclideanDistanceSimi
                     {2.0, 5.0, 6.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2);
-    assertCorrelationEquals(0.5896248568217328, correlation);
+    assertCorrelationEquals(0.5598164905901122, correlation);
   }
 
   public void testSimpleWeighted() throws Exception {
@@ -121,7 +121,7 @@ public final class EuclideanDistanceSimi
                     {2.0, 5.0, 6.0},
             });
     double correlation = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED).userSimilarity(1, 2);
-    assertCorrelationEquals(0.8974062142054332, correlation);
+    assertCorrelationEquals(0.889954122647528, correlation);
   }
 
   public void testFullItemCorrelation1() throws Exception {
@@ -145,8 +145,7 @@ public final class EuclideanDistanceSimi
             });
     double correlation =
         new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1);
-    // Yeah, undefined in this case
-    assertTrue(Double.isNaN(correlation));
+    assertEquals(1.0, correlation);
   }
 
   public void testNoItemCorrelation1() throws Exception {
@@ -158,7 +157,7 @@ public final class EuclideanDistanceSimi
             });
     double correlation =
         new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1);
-    assertCorrelationEquals(0.424465381883345, correlation);
+    assertCorrelationEquals(0.24357264905599915, correlation);
   }
 
   public void testNoItemCorrelation2() throws Exception {
@@ -168,9 +167,8 @@ public final class EuclideanDistanceSimi
                     {null, 1.0, null},
                     {null, null, 1.0},
             });
-    double correlation =
-        new EuclideanDistanceSimilarity(dataModel).itemSimilarity(1, 2);
-    assertTrue(Double.isNaN(correlation));
+    double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(1, 2);
+    assertEquals(0.0, correlation);
   }
 
   public void testNoItemCorrelation3() throws Exception {
@@ -183,7 +181,7 @@ public final class EuclideanDistanceSimi
             });
     double correlation =
         new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1);
-    assertCorrelationEquals(0.3606507916004517, correlation);
+    assertCorrelationEquals(0.10244407226831752, correlation);
   }
 
   public void testSimpleItem() throws Exception {
@@ -196,7 +194,7 @@ public final class EuclideanDistanceSimi
             });
     double correlation =
         new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1);
-    assertCorrelationEquals(0.5896248568217328, correlation);
+    assertCorrelationEquals(0.5598164905901122, correlation);
   }
 
   public void testSimpleItemWeighted() throws Exception {
@@ -209,7 +207,7 @@ public final class EuclideanDistanceSimi
             });
     ItemSimilarity itemSimilarity = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED);
     double correlation = itemSimilarity.itemSimilarity(0, 1);
-    assertCorrelationEquals(0.8974062142054332, correlation);
+    assertCorrelationEquals(0.889954122647528, correlation);
   }
 
   public void testRefresh() throws TasteException {