You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/10/01 15:30:24 UTC
svn commit: r1177975 - in
/mahout/trunk/math/src/main/java/org/apache/mahout/math:
AbstractVector.java DenseVector.java RandomAccessSparseVector.java
SequentialAccessSparseVector.java VectorView.java
Author: srowen
Date: Sat Oct 1 13:30:24 2011
New Revision: 1177975
URL: http://svn.apache.org/viewvc?rev=1177975&view=rev
Log:
MAHOUT-823 standardize, optimize vector dot product
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Oct 1 13:30:24 2011
@@ -26,6 +26,8 @@ import java.util.Iterator;
/** Implementations of generic capabilities like sum of elements and dot products */
public abstract class AbstractVector implements Vector {
+
+ private static final double LOG2 = Math.log(2.0);
private int size;
protected double lengthSquared = -1.0;
@@ -112,6 +114,39 @@ public abstract class AbstractVector imp
if (this == x) {
return dotSelf();
}
+
+ // Crude rule of thumb: when a sequential-access vector, with O(log n) lookups, has about
+ // 2^n elements, its lookups take longer than a dense / random access vector (with O(1) lookups) by
+ // about a factor of (0.71n - 12.3). This holds pretty well from n=19 up to at least n=23 according to my tests;
+ // below that lookups are so fast that this difference is near zero.
+
+ int thisNumNonDefault = getNumNondefaultElements();
+ int thatNumNonDefault = x.getNumNondefaultElements();
+ // Default: dot from smaller vector to larger vector
+ boolean reverseDot = thatNumNonDefault < thisNumNonDefault;
+
+ // But, see if we should override that -- is exactly one of them sequential access and so slower to lookup in?
+ if (isSequentialAccess() != x.isSequentialAccess()) {
+ double log2ThisSize = Math.log(thisNumNonDefault) / LOG2;
+ double log2ThatSize = Math.log(thatNumNonDefault) / LOG2;
+ // Only override when the O(log n) factor seems big enough to care about:
+ if (log2ThisSize >= 19.0 && log2ThatSize >= 19.0) {
+ double dotCost = thisNumNonDefault;
+ if (x.isSequentialAccess()) {
+ dotCost *= 0.71 * log2ThatSize - 12.3;
+ }
+ double reverseDotCost = thatNumNonDefault;
+ if (isSequentialAccess()) {
+ reverseDotCost *= 0.71 * log2ThisSize - 12.3;
+ }
+ reverseDot = reverseDotCost < dotCost;
+ }
+ }
+
+ if (reverseDot) {
+ return x.dot(this);
+ }
+
double result = 0.0;
Iterator<Element> iter = iterateNonZero();
while (iter.hasNext()) {
@@ -191,7 +226,7 @@ public abstract class AbstractVector imp
Iterator<Element> iter = result.iterateNonZero();
while (iter.hasNext()) {
Element element = iter.next();
- element.set(Math.log(1 + element.get()) / denominator);
+ element.set(Math.log1p(element.get()) / denominator);
}
return result;
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java Sat Oct 1 13:30:24 2011
@@ -224,34 +224,6 @@ public class DenseVector extends Abstrac
}
}
-
- @Override
- public double dot(Vector x) {
- if (size() != x.size()) {
- throw new CardinalityException(size(), x.size());
- }
- if (this == x) {
- return dotSelf();
- }
-
- double result = 0;
- if (x instanceof DenseVector) {
- for (int i = 0; i < x.size(); i++) {
- result += this.values[i] * x.getQuick(i);
- }
- return result;
- } else {
- // Try to get the speed boost associated fast/normal seq access on x and quick lookup on this
- Iterator<Element> iter = x.iterateNonZero();
- while (iter.hasNext()) {
- Element element = iter.next();
- result += element.get() * this.values[element.index()];
- }
- return result;
- }
- }
-
-
private final class NonDefaultIterator extends AbstractIterator<Element> {
private final DenseElement element = new DenseElement();
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java Sat Oct 1 13:30:24 2011
@@ -168,46 +168,6 @@ public class RandomAccessSparseVector ex
return new AllIterator();
}
- @Override
- public double dot(Vector x) {
- if (size() != x.size()) {
- throw new CardinalityException(size(), x.size());
- }
- if (this == x) {
- return dotSelf();
- }
-
- double result = 0;
- if (x instanceof SequentialAccessSparseVector) {
- Iterator<Element> iter = x.iterateNonZero();
- while (iter.hasNext()) {
- Element element = iter.next();
- result += element.get() * getQuick(element.index());
- }
- return result;
- } else {
- Iterator<Element> iter = iterateNonZero();
- while (iter.hasNext()) {
- Element element = iter.next();
- result += element.get() * x.getQuick(element.index());
- }
- return result;
- }
- }
-
-
- private static final class AddToVector implements IntDoubleProcedure {
- private final Vector v;
- private AddToVector(Vector v) {
- this.v = v;
- }
- @Override
- public boolean apply(int key, double value) {
- v.set(key, value + v.get(key));
- return true;
- }
- }
-
private final class NonDefaultIterator extends AbstractIterator<Element> {
private final RandomAccessElement element = new RandomAccessElement();
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Sat Oct 1 13:30:24 2011
@@ -211,56 +211,6 @@ public class SequentialAccessSparseVecto
}
@Override
- public double dot(Vector x) {
- if (size() != x.size()) {
- throw new CardinalityException(size(), x.size());
- }
- if (this == x) {
- return dotSelf();
- }
-
- if (x instanceof SequentialAccessSparseVector) {
- // For sparse SeqAccVectors. do dot product without lookup in a linear fashion
- Iterator<Element> myIter = iterateNonZero();
- Iterator<Element> otherIter = x.iterateNonZero();
- if (!myIter.hasNext() || !otherIter.hasNext()) {
- return 0.0;
- }
- Element myCurrent = myIter.next();
- Element otherCurrent = otherIter.next();
- double result = 0.0;
- while (true) {
- int myIndex = myCurrent.index();
- int otherIndex = otherCurrent.index();
- if (myIndex == otherIndex) {
- result += myCurrent.get() * otherCurrent.get();
- }
- if (myIndex <= otherIndex) {
- if (!myIter.hasNext()) {
- break;
- }
- myCurrent = myIter.next();
- }
- if (myIndex >= otherIndex) {
- if (!otherIter.hasNext()) {
- break;
- }
- otherCurrent = otherIter.next();
- }
- }
- return result;
- } else { // seq.rand. seq.dense
- double result = 0.0;
- Iterator<Element> iter = iterateNonZero();
- while (iter.hasNext()) {
- Element element = iter.next();
- result += element.get() * x.getQuick(element.index());
- }
- return result;
- }
- }
-
- @Override
public Vector minus(Vector that) {
if (size() != that.size()) {
throw new CardinalityException(size(), that.size());
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java?rev=1177975&r1=1177974&r2=1177975&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java Sat Oct 1 13:30:24 2011
@@ -178,18 +178,6 @@ public class VectorView extends Abstract
}
@Override
- public double dot(Vector x) {
- if (size() != x.size()) {
- throw new CardinalityException(size(), x.size());
- }
- double result = 0;
- for (int i = 0; i < size(); i++) {
- result += getQuick(i) * x.getQuick(i);
- }
- return result;
- }
-
- @Override
public double getLengthSquared() {
double result = 0.0;
int size = size();