You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/22 15:38:49 UTC

svn commit: r912585 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance: CosineDistanceMeasure.java ManhattanDistanceMeasure.java SquaredEuclideanDistanceMeasure.java TanimotoDistanceMeasure.java

Author: robinanil
Date: Mon Feb 22 14:38:48 2010
New Revision: 912585

URL: http://svn.apache.org/viewvc?rev=912585&view=rev
Log:
Distance Measure improvements

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java?rev=912585&r1=912584&r2=912585&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java Mon Feb 22 14:38:48 2010
@@ -19,7 +19,6 @@
 
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Iterator;
 
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.common.parameters.Parameter;
@@ -71,18 +70,8 @@
     if (v1.size() != v2.size()) {
       throw new CardinalityException();
     }
-    double lengthSquaredv1 = 0.0;
-    Iterator<Vector.Element> iter = v1.iterateNonZero();
-    while (iter.hasNext()) {
-      Vector.Element elt = iter.next();
-      lengthSquaredv1 += elt.get() * elt.get();
-    }
-    iter = v2.iterateNonZero();
-    double lengthSquaredv2 = 0.0;
-    while (iter.hasNext()) {
-      Vector.Element elt = iter.next();
-      lengthSquaredv2 += elt.get() * elt.get();
-    }
+    double lengthSquaredv1 = v1.getLengthSquared();
+    double lengthSquaredv2 = v2.getLengthSquared();
     
     double dotProduct = v1.dot(v2);
     double denominator = Math.sqrt(lengthSquaredv1) * Math.sqrt(lengthSquaredv2);
@@ -97,12 +86,8 @@
   
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
-    Iterator<Vector.Element> iter = v.iterateNonZero();
-    double lengthSquaredv = 0.0;
-    while (iter.hasNext()) {
-      Vector.Element elt = iter.next();
-      lengthSquaredv += elt.get() * elt.get();
-    }
+
+    double lengthSquaredv =  v.getLengthSquared();
     
     double dotProduct = centroid.dot(v);
     double denominator = Math.sqrt(centroidLengthSquare) * Math.sqrt(lengthSquaredv);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java?rev=912585&r1=912584&r2=912585&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java Mon Feb 22 14:38:48 2010
@@ -62,9 +62,8 @@
     }
     double result = 0;
     Vector vector = v1.minus(v2);
-    Iterator<Vector.Element> iter = vector.iterateNonZero(); // this contains all non zero elements between
-                                                             // the
-    // two
+    Iterator<Vector.Element> iter = vector.iterateNonZero(); 
+    // this contains all non zero elements between the two
     while (iter.hasNext()) {
       Vector.Element e = iter.next();
       result += Math.abs(e.get());

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java?rev=912585&r1=912584&r2=912585&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java Mon Feb 22 14:38:48 2010
@@ -49,7 +49,7 @@
   
   @Override
   public double distance(Vector v1, Vector v2) {
-    return v1.getDistanceSquared(v2);
+    return v2.getDistanceSquared(v1);
   }
   
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java?rev=912585&r1=912584&r2=912585&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java Mon Feb 22 14:38:48 2010
@@ -39,8 +39,16 @@
    */
   @Override
   public double distance(Vector a, Vector b) {
-    double ab = dot(a, b);
-    double denominator = dot(a, a) + dot(b, b) - ab;
+    double ab = 0;
+    double denominator = 0;
+    if (getWeights() != null) {
+      ab = dot(b, a); // b is SequentialAccess
+      denominator = dot(a, a) + dot(b, b) - ab;
+    } else {
+      ab = b.dot(a); // b is SequentialAccess
+      denominator = a.getLengthSquared() + b.getLengthSquared() - ab;
+    }
+    
     if (denominator < ab) { // correct for fp round-off: distance >= 0
       denominator = ab;
     }
@@ -61,9 +69,7 @@
     while (it.hasNext() && (el = it.next()) != null) {
       double elementValue = el.get();
       double value = elementValue * (sameVector ? elementValue : b.getQuick(el.index()));
-      if (weights != null) {
-        value *= weights.getQuick(el.index());
-      }
+      value *= weights.getQuick(el.index());  
       dot += value;
     }
     return dot;