You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/03/06 05:59:58 UTC
svn commit: r919701 - in /lucene/mahout/trunk/math/src:
main/java/org/apache/mahout/math/decomposer/lanczos/
test/java/org/apache/mahout/math/decomposer/
test/java/org/apache/mahout/math/decomposer/hebbian/
test/java/org/apache/mahout/math/decomposer/l...
Author: jmannix
Date: Sat Mar 6 04:59:57 2010
New Revision: 919701
URL: http://svn.apache.org/viewvc?rev=919701&view=rev
Log:
Extend LanczosSolver to take symmetric matrices as well as rectangular ones (with tests included). Partially fixes MAHOUT-310 (for the non-hadoop case).
Modified:
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java Sat Mar 6 04:59:57 2010
@@ -96,6 +96,14 @@
int desiredRank,
Matrix eigenVectors,
List<Double> eigenValues) {
+ solve(corpus, desiredRank, eigenVectors, eigenValues, false);
+ }
+
+ public void solve(VectorIterable corpus,
+ int desiredRank,
+ Matrix eigenVectors,
+ List<Double> eigenValues,
+ boolean isSymmetric) {
log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
Vector currentVector = getInitialVector(corpus);
Vector previousVector = new DenseVector(currentVector.size());
@@ -106,7 +114,7 @@
DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
for (int i = 1; i < desiredRank; i++) {
startTime(TimingSection.ITERATE);
- Vector nextVector = corpus.timesSquared(currentVector);
+ Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
log.info("{} passes through the corpus so far...", i);
calculateScaleFactor(nextVector);
nextVector.assign(new Scale(1 / scaleFactor));
Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java Sat Mar 6 04:59:57 2010
@@ -23,12 +23,16 @@
import org.apache.mahout.math.SparseRowMatrix;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.Random;
public abstract class SolverTest extends TestCase {
+ private static final Logger log = LoggerFactory.getLogger(SolverTest.class);
+
protected SolverTest(String name) {
super(name);
}
@@ -55,18 +59,24 @@
}
}
- public static void assertEigen(Matrix eigens, VectorIterable corpus, double errorMargin) {
- assertEigen(eigens, corpus, eigens.numRows(), errorMargin);
+ public static void assertEigen(Matrix eigens, VectorIterable corpus, double errorMargin, boolean isSymmetric) {
+ assertEigen(eigens, corpus, eigens.numRows(), errorMargin, isSymmetric);
}
- public static void assertEigen(Matrix eigens, VectorIterable corpus, int numEigensToCheck, double errorMargin) {
+ public static void assertEigen(Matrix eigens,
+ VectorIterable corpus,
+ int numEigensToCheck,
+ double errorMargin,
+ boolean isSymmetric) {
for (int i = 0; i < numEigensToCheck; i++) {
Vector e = eigens.getRow(i);
- if (e.norm(2) == 0) continue;
- Vector afterMultiply = corpus.timesSquared(e);
+ if (e.getLengthSquared() == 0) continue;
+ Vector afterMultiply = isSymmetric ? corpus.times(e) : corpus.timesSquared(e);
double dot = afterMultiply.dot(e);
- double error = 1 - dot / (afterMultiply.norm(2) * e.norm(2));
- assertTrue("Error margin: " + error + " too high! (for eigen " + i + ')', Math.abs(error) < errorMargin);
+ double afterNorm = afterMultiply.getLengthSquared();
+ double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared());
+ log.info("Eigenvalue({}) = {}", i, Math.sqrt(afterNorm/e.getLengthSquared()));
+ assertTrue("Error margin: {" + error + " too high! (for eigen " + i + ")", Math.abs(error) < errorMargin);
}
}
@@ -81,10 +91,10 @@
* @return
*/
public static Matrix randomSequentialAccessSparseMatrix(int numRows,
- int nonNullRows,
- int numCols,
- int entriesPerRow,
- double entryMean) {
+ int nonNullRows,
+ int numCols,
+ int entriesPerRow,
+ double entryMean) {
SparseRowMatrix m = new SparseRowMatrix(new int[]{numRows, numCols});
double n = 0;
Random r = new Random(1234L);
@@ -96,8 +106,8 @@
v.set(col, val * entryMean);
}
int c = r.nextInt(numRows);
- if (r.nextBoolean()) {
- m.assignRow(c, v);
+ if (r.nextBoolean() || numRows == nonNullRows) {
+ m.assignRow(numRows == nonNullRows ? i : c, v);
} else {
Vector other = m.getRow(r.nextInt(numRows));
if (other != null && other.getLengthSquared() > 0) {
Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java Sat Mar 6 04:59:57 2010
@@ -91,7 +91,7 @@
rank,
state);
eigens = state.getCurrentEigens();
- assertEigen(eigens, corpus, 0.05);
+ assertEigen(eigens, corpus, 0.05, false);
assertOrthonormal(eigens, 1.0e-6);
System.out.println("Avg solving (Hebbian) time in ms: " + optimizedTime);
}
Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java Sat Mar 6 04:59:57 2010
@@ -35,18 +35,30 @@
Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
int rank = 50;
Matrix eigens = new DenseMatrix(rank, numColumns);
- long time = timeLanczos(corpus, eigens, rank);
+ long time = timeLanczos(corpus, eigens, rank, false);
assertTrue("Lanczos taking too long! Are you in the debugger? :)", time < 10000);
assertOrthonormal(eigens);
- assertEigen(eigens, corpus, 0.1);
+ assertEigen(eigens, corpus, 0.1, false);
}
- public static long timeLanczos(Matrix corpus, Matrix eigens, int rank) throws Exception {
+ public void testLanczosSolverSymmetric() throws Exception {
+ int numColumns = 400;
+ Matrix corpus = randomSequentialAccessSparseMatrix(500, 450, numColumns, 10, 1.0);
+ Matrix gramMatrix = corpus.times(corpus.transpose());
+ int rank = 30;
+ Matrix eigens = new DenseMatrix(rank, gramMatrix.numCols());
+ long time = timeLanczos(gramMatrix, eigens, rank, true);
+ assertTrue("Lanczos taking too long! Are you in the debugger? :)", time < 10000);
+ assertOrthonormal(eigens);
+ assertEigen(eigens, gramMatrix, 0.1, true);
+ }
+
+ public static long timeLanczos(Matrix corpus, Matrix eigens, int rank, boolean symmetric) throws Exception {
long start = System.currentTimeMillis();
LanczosSolver solver = new LanczosSolver();
List<Double> eVals = new ArrayList<Double>();
- solver.solve(corpus, rank, eigens, eVals);
+ solver.solve(corpus, rank, eigens, eVals, symmetric);
long end = System.currentTimeMillis();
return end - start;