You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/03/06 05:59:58 UTC

svn commit: r919701 - in /lucene/mahout/trunk/math/src: main/java/org/apache/mahout/math/decomposer/lanczos/ test/java/org/apache/mahout/math/decomposer/ test/java/org/apache/mahout/math/decomposer/hebbian/ test/java/org/apache/mahout/math/decomposer/l...

Author: jmannix
Date: Sat Mar  6 04:59:57 2010
New Revision: 919701

URL: http://svn.apache.org/viewvc?rev=919701&view=rev
Log:
Extend LanczosSolver to take symmetric matrices as well as rectangular ones (with tests included).  Partially fixes MAHOUT-310 (for the non-hadoop case).

Modified:
    lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
    lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
    lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
    lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java

Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java Sat Mar  6 04:59:57 2010
@@ -96,6 +96,14 @@
                     int desiredRank,
                     Matrix eigenVectors,
                     List<Double> eigenValues) {
+    solve(corpus, desiredRank, eigenVectors, eigenValues, false);
+  }
+
+  public void solve(VectorIterable corpus,
+                    int desiredRank,
+                    Matrix eigenVectors,
+                    List<Double> eigenValues,
+                    boolean isSymmetric) {
     log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
     Vector currentVector = getInitialVector(corpus);
     Vector previousVector = new DenseVector(currentVector.size());
@@ -106,7 +114,7 @@
     DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
     for (int i = 1; i < desiredRank; i++) {
       startTime(TimingSection.ITERATE);
-      Vector nextVector = corpus.timesSquared(currentVector);
+      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
       log.info("{} passes through the corpus so far...", i);
       calculateScaleFactor(nextVector);
       nextVector.assign(new Scale(1 / scaleFactor));

Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java Sat Mar  6 04:59:57 2010
@@ -23,12 +23,16 @@
 import org.apache.mahout.math.SparseRowMatrix;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Random;
 
 
 public abstract class SolverTest extends TestCase {
 
+  private static final Logger log = LoggerFactory.getLogger(SolverTest.class);
+
   protected SolverTest(String name) {
     super(name);
   }
@@ -55,18 +59,24 @@
     }
   }
 
-  public static void assertEigen(Matrix eigens, VectorIterable corpus, double errorMargin) {
-    assertEigen(eigens, corpus, eigens.numRows(), errorMargin);  
+  public static void assertEigen(Matrix eigens, VectorIterable corpus, double errorMargin, boolean isSymmetric) {
+    assertEigen(eigens, corpus, eigens.numRows(), errorMargin, isSymmetric);
   }
 
-  public static void assertEigen(Matrix eigens, VectorIterable corpus,  int numEigensToCheck, double errorMargin) {
+  public static void assertEigen(Matrix eigens,
+                                 VectorIterable corpus,
+                                 int numEigensToCheck,
+                                 double errorMargin,
+                                 boolean isSymmetric) {
     for (int i = 0; i < numEigensToCheck; i++) {
       Vector e = eigens.getRow(i);
-      if (e.norm(2) == 0) continue;
-      Vector afterMultiply = corpus.timesSquared(e);
+      if (e.getLengthSquared() == 0) continue;
+      Vector afterMultiply = isSymmetric ? corpus.times(e) : corpus.timesSquared(e);
       double dot = afterMultiply.dot(e);
-      double error = 1 - dot / (afterMultiply.norm(2) * e.norm(2));
-      assertTrue("Error margin: " + error + " too high! (for eigen " + i + ')', Math.abs(error) < errorMargin);
+      double afterNorm = afterMultiply.getLengthSquared();
+      double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared());
+      log.info("Eigenvalue({}) = {}", i, Math.sqrt(afterNorm/e.getLengthSquared()));
+      assertTrue("Error margin: {" + error + " too high! (for eigen " + i + ")", Math.abs(error) < errorMargin);
     }
   }
 
@@ -81,10 +91,10 @@
    * @return
    */
   public static Matrix randomSequentialAccessSparseMatrix(int numRows,
-                                                           int nonNullRows,
-                                                           int numCols,
-                                                           int entriesPerRow,
-                                                           double entryMean) {
+                                                          int nonNullRows,
+                                                          int numCols,
+                                                          int entriesPerRow,
+                                                          double entryMean) {
     SparseRowMatrix m = new SparseRowMatrix(new int[]{numRows, numCols});
     double n = 0;
     Random r = new Random(1234L);
@@ -96,8 +106,8 @@
         v.set(col, val * entryMean);
       }
       int c = r.nextInt(numRows);
-      if (r.nextBoolean()) {
-        m.assignRow(c, v);
+      if (r.nextBoolean() || numRows == nonNullRows) {
+        m.assignRow(numRows == nonNullRows ? i : c, v);
       } else {
         Vector other = m.getRow(r.nextInt(numRows));
         if (other != null && other.getLengthSquared() > 0) {

Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java Sat Mar  6 04:59:57 2010
@@ -91,7 +91,7 @@
                                     rank,
                                     state);
     eigens = state.getCurrentEigens();
-    assertEigen(eigens, corpus, 0.05);
+    assertEigen(eigens, corpus, 0.05, false);
     assertOrthonormal(eigens, 1.0e-6);
     System.out.println("Avg solving (Hebbian) time in ms: " + optimizedTime);
   }

Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java?rev=919701&r1=919700&r2=919701&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java (original)
+++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java Sat Mar  6 04:59:57 2010
@@ -35,18 +35,30 @@
     Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
     int rank = 50;
     Matrix eigens = new DenseMatrix(rank, numColumns);
-    long time = timeLanczos(corpus, eigens, rank);
+    long time = timeLanczos(corpus, eigens, rank, false);
     assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
     assertOrthonormal(eigens);
-    assertEigen(eigens, corpus, 0.1);
+    assertEigen(eigens, corpus, 0.1, false);
   }
 
-  public static long timeLanczos(Matrix corpus, Matrix eigens, int rank) throws Exception {
+  public void testLanczosSolverSymmetric() throws Exception {
+    int numColumns = 400;
+    Matrix corpus = randomSequentialAccessSparseMatrix(500, 450, numColumns, 10, 1.0);
+    Matrix gramMatrix = corpus.times(corpus.transpose());
+    int rank = 30;
+    Matrix eigens = new DenseMatrix(rank, gramMatrix.numCols());
+    long time = timeLanczos(gramMatrix, eigens, rank, true);
+    assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
+    assertOrthonormal(eigens);
+    assertEigen(eigens, gramMatrix, 0.1, true);
+  }
+
+  public static long timeLanczos(Matrix corpus, Matrix eigens, int rank, boolean symmetric) throws Exception {
     long start = System.currentTimeMillis();
 
     LanczosSolver solver = new LanczosSolver();
     List<Double> eVals = new ArrayList<Double>();
-    solver.solve(corpus, rank, eigens, eVals);
+    solver.solve(corpus, rank, eigens, eVals, symmetric);
     
     long end = System.currentTimeMillis();
     return end - start;