You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/10/01 15:30:24 UTC

svn commit: r1177975 - in /mahout/trunk/math/src/main/java/org/apache/mahout/math: AbstractVector.java DenseVector.java RandomAccessSparseVector.java SequentialAccessSparseVector.java VectorView.java

Author: srowen
Date: Sat Oct  1 13:30:24 2011
New Revision: 1177975

URL: http://svn.apache.org/viewvc?rev=1177975&view=rev
Log:
MAHOUT-823 standardize, optimize vector dot product

Modified:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Oct  1 13:30:24 2011
@@ -26,6 +26,8 @@ import java.util.Iterator;
 
 /** Implementations of generic capabilities like sum of elements and dot products */
 public abstract class AbstractVector implements Vector {
+  
+  private static final double LOG2 = Math.log(2.0);
 
   private int size;
   protected double lengthSquared = -1.0;
@@ -112,6 +114,39 @@ public abstract class AbstractVector imp
     if (this == x) {
       return dotSelf();
     }
+
+    // Crude rule of thumb: when a sequential-access vector, with O(log n) lookups, has about
+    // 2^n elements, its lookups take longer than a dense / random access vector (with O(1) lookups) by
+    // about a factor of (0.71n - 12.3). This holds pretty well from n=19 up to at least n=23 according to my tests;
+    // below that lookups are so fast that this difference is near zero.
+
+    int thisNumNonDefault = getNumNondefaultElements();
+    int thatNumNonDefault = x.getNumNondefaultElements();
+    // Default: dot from smaller vector to larger vector
+    boolean reverseDot = thatNumNonDefault < thisNumNonDefault;
+
+    // But, see if we should override that -- is exactly one of them sequential access and so slower to lookup in?
+    if (isSequentialAccess() != x.isSequentialAccess()) {
+      double log2ThisSize = Math.log(thisNumNonDefault) / LOG2;
+      double log2ThatSize = Math.log(thatNumNonDefault) / LOG2;
+      // Only override when the O(log n) factor seems big enough to care about:
+      if (log2ThisSize >= 19.0 && log2ThatSize >= 19.0) {
+        double dotCost = thisNumNonDefault;
+        if (x.isSequentialAccess()) {
+          dotCost *= 0.71 * log2ThatSize - 12.3;
+        }
+        double reverseDotCost = thatNumNonDefault;
+        if (isSequentialAccess()) {
+          reverseDotCost *= 0.71 * log2ThisSize - 12.3;
+        }
+        reverseDot = reverseDotCost < dotCost;
+      }
+    }
+
+    if (reverseDot) {
+      return x.dot(this);
+    }
+
     double result = 0.0;
     Iterator<Element> iter = iterateNonZero();
     while (iter.hasNext()) {
@@ -191,7 +226,7 @@ public abstract class AbstractVector imp
       Iterator<Element> iter = result.iterateNonZero();
       while (iter.hasNext()) {
         Element element = iter.next();
-        element.set(Math.log(1 + element.get()) / denominator);
+        element.set(Math.log1p(element.get()) / denominator);
       }
       return result;
     }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java Sat Oct  1 13:30:24 2011
@@ -224,34 +224,6 @@ public class DenseVector extends Abstrac
     }
   }
 
-  
-  @Override
-  public double dot(Vector x) {
-    if (size() != x.size()) {
-      throw new CardinalityException(size(), x.size());
-    }
-    if (this == x) {
-      return dotSelf();
-    }
-    
-    double result = 0;
-    if (x instanceof DenseVector) {
-      for (int i = 0; i < x.size(); i++) {
-        result += this.values[i] * x.getQuick(i);
-      }
-      return result;
-    } else {
-      // Try to get the speed boost associated fast/normal seq access on x and quick lookup on this
-      Iterator<Element> iter = x.iterateNonZero();
-      while (iter.hasNext()) {
-        Element element = iter.next();
-        result += element.get() * this.values[element.index()];
-      }
-      return result;
-    }
-  }
-
-
   private final class NonDefaultIterator extends AbstractIterator<Element> {
 
     private final DenseElement element = new DenseElement();

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java Sat Oct  1 13:30:24 2011
@@ -168,46 +168,6 @@ public class RandomAccessSparseVector ex
     return new AllIterator();
   }
 
-  @Override
-  public double dot(Vector x) {
-    if (size() != x.size()) {
-      throw new CardinalityException(size(), x.size());
-    }
-    if (this == x) {
-      return dotSelf();
-    }
-    
-    double result = 0;
-    if (x instanceof SequentialAccessSparseVector) {
-      Iterator<Element> iter = x.iterateNonZero();
-      while (iter.hasNext()) {
-        Element element = iter.next();
-        result += element.get() * getQuick(element.index());
-      }
-      return result;
-    } else { 
-      Iterator<Element> iter = iterateNonZero();
-      while (iter.hasNext()) {
-        Element element = iter.next();
-        result += element.get() * x.getQuick(element.index());
-      }
-      return result;
-    }
-  }
-
-
-  private static final class AddToVector implements IntDoubleProcedure {
-    private final Vector v;
-    private AddToVector(Vector v) {
-      this.v = v;
-    }
-    @Override
-    public boolean apply(int key, double value) {
-      v.set(key, value + v.get(key));
-      return true;
-    }
-  }
-
   private final class NonDefaultIterator extends AbstractIterator<Element> {
 
     private final RandomAccessElement element = new RandomAccessElement();

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Sat Oct  1 13:30:24 2011
@@ -211,56 +211,6 @@ public class SequentialAccessSparseVecto
   }
 
   @Override
-  public double dot(Vector x) {
-    if (size() != x.size()) {
-      throw new CardinalityException(size(), x.size());
-    }
-    if (this == x) {
-      return dotSelf();
-    }
-
-    if (x instanceof SequentialAccessSparseVector) {
-      // For sparse SeqAccVectors. do dot product without lookup in a linear fashion
-      Iterator<Element> myIter = iterateNonZero();
-      Iterator<Element> otherIter = x.iterateNonZero();
-      if (!myIter.hasNext() || !otherIter.hasNext()) {
-        return 0.0;
-      }
-      Element myCurrent = myIter.next();
-      Element otherCurrent = otherIter.next();
-      double result = 0.0;
-      while (true) {
-        int myIndex = myCurrent.index();
-        int otherIndex = otherCurrent.index();
-        if (myIndex == otherIndex) {
-          result += myCurrent.get() * otherCurrent.get();
-        }
-        if (myIndex <= otherIndex) {
-          if (!myIter.hasNext()) {
-            break;
-          }
-          myCurrent = myIter.next();
-        }
-        if (myIndex >= otherIndex) {
-          if (!otherIter.hasNext()) {
-            break;
-          }
-          otherCurrent = otherIter.next();
-        }
-      }
-      return result;
-    } else { // seq.rand. seq.dense
-      double result = 0.0;
-      Iterator<Element> iter = iterateNonZero();
-      while (iter.hasNext()) {
-        Element element = iter.next();
-        result += element.get() * x.getQuick(element.index());
-      }
-      return result;
-    }
-  }
-
-  @Override
   public Vector minus(Vector that) {
     if (size() != that.size()) {
       throw new CardinalityException(size(), that.size());

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java Sat Oct  1 13:30:24 2011
@@ -178,18 +178,6 @@ public class VectorView extends Abstract
   }
 
   @Override
-  public double dot(Vector x) {
-    if (size() != x.size()) {
-      throw new CardinalityException(size(), x.size());
-    }
-    double result = 0;
-    for (int i = 0; i < size(); i++) {
-      result += getQuick(i) * x.getQuick(i);
-    }
-    return result;
-  }
-
-  @Override
   public double getLengthSquared() {
     double result = 0.0;
     int size = size();