You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/04/05 07:51:48 UTC
svn commit: r930799 - in /lucene/mahout/trunk/math: pom.xml
src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
Author: srowen
Date: Mon Apr 5 05:51:48 2010
New Revision: 930799
URL: http://svn.apache.org/viewvc?rev=930799&view=rev
Log:
Restore logging to SVD related code
Modified:
lucene/mahout/trunk/math/pom.xml
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
Modified: lucene/mahout/trunk/math/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/pom.xml?rev=930799&r1=930798&r2=930799&view=diff
==============================================================================
--- lucene/mahout/trunk/math/pom.xml (original)
+++ lucene/mahout/trunk/math/pom.xml Mon Apr 5 05:51:48 2010
@@ -100,6 +100,19 @@
</dependency>
<dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.5.8</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jcl</artifactId>
+ <version>1.5.8</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java?rev=930799&r1=930798&r2=930799&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java Mon Apr 5 05:51:48 2010
@@ -23,6 +23,7 @@ import java.util.Random;
import java.util.ArrayList;
+import org.apache.mahout.math.AbstractMatrix;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
@@ -32,6 +33,8 @@ import org.apache.mahout.math.decomposer
import org.apache.mahout.math.function.TimesFunction;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.PlusMult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* The Hebbian solver is an iterative, sparse, singular value decomposition solver, based on the paper
@@ -41,13 +44,16 @@ import org.apache.mahout.math.function.P
*/
public class HebbianSolver {
+ private static final Logger log = LoggerFactory.getLogger(HebbianSolver.class);
+
private final EigenUpdater updater;
private final SingularVectorVerifier verifier;
private final double convergenceTarget;
private final int maxPassesPerEigen;
private final Random rng = new Random();
- //private int numPasses = 0;
+ private int numPasses = 0;
+ private static final boolean debug = false;
/**
* Creates a new HebbianSolver
@@ -157,6 +163,7 @@ public class HebbianSolver {
int cols = corpus.numCols();
Matrix eigens = new DenseMatrix(desiredRank, cols);
List<Double> eigenValues = new ArrayList<Double>();
+ log.info("Finding " + desiredRank + " singular vectors of matrix with " + corpus.numRows() + " rows, via Hebbian");
/**
* The corpusProjections matrix is a running cache of the residual projection of each corpus vector against all
* of the previously found singular vectors. Without this, if multiple passes over the data is made (per
@@ -179,6 +186,17 @@ public class HebbianSolver {
updater.update(currentEigen, corpus.getRow(corpusRow), state);
}
state.setFirstPass(false);
+ if (debug) {
+ if (previousEigen == null) {
+ previousEigen = currentEigen.clone();
+ } else {
+ double dot = currentEigen.dot(previousEigen);
+ if (dot > 0) {
+ dot /= (currentEigen.norm(2) * previousEigen.norm(2));
+ }
+ // log.info("Current pass * previous pass = {}", dot);
+ }
+ }
}
// converged!
double eigenValue = state.getStatusProgress().get(state.getStatusProgress().size() - 1).getEigenValue();
@@ -188,6 +206,7 @@ public class HebbianSolver {
eigens.assignRow(i, currentEigen);
eigenValues.add(eigenValue);
state.setCurrentEigenValues(eigenValues);
+ log.info("Found eigenvector {}, eigenvalue: {}", i, eigenValue);
/**
* TODO: Persist intermediate output!
@@ -197,7 +216,7 @@ public class HebbianSolver {
state.setActivationDenominatorSquared(0);
state.setActivationNumerator(0);
state.getStatusProgress().clear();
- //numPasses = 0;
+ numPasses = 0;
}
return state;
}
@@ -234,11 +253,13 @@ public class HebbianSolver {
protected boolean hasNotConverged(Vector currentPseudoEigen,
Matrix corpus,
TrainingState state) {
- //numPasses++;
+ numPasses++;
if (state.isFirstPass()) {
+ log.info("First pass through the corpus, no need to check convergence...");
return true;
}
Matrix previousEigens = state.getCurrentEigens();
+ log.info("Have made {} passes through the corpus, checking convergence...", numPasses);
/*
* Step 1: orthogonalize currentPseudoEigen by subtracting off eigen(i) * helper.get(i)
* Step 2: zero-out the helper vector because it has already helped.
@@ -248,11 +269,20 @@ public class HebbianSolver {
currentPseudoEigen.assign(previousEigen, new PlusMult(-state.getHelperVector().get(i)));
state.getHelperVector().set(i, 0);
}
+ if (debug && currentPseudoEigen.norm(2) > 0) {
+ for (int i = 0; i < state.getNumEigensProcessed(); i++) {
+ Vector previousEigen = previousEigens.getRow(i);
+ log.info("dot with previous: {}", (previousEigen.dot(currentPseudoEigen)) / currentPseudoEigen.norm(2));
+ }
+ }
/*
* Step 3: verify how eigen-like the prospective eigen is. This is potentially asynchronous.
*/
EigenStatus status = verify(corpus, currentPseudoEigen);
- if (!status.inProgress()) {
+ if (status.inProgress()) {
+ log.info("Verifier not finished, making another pass...");
+ } else {
+ log.info("Has 1 - cosAngle: {}, convergence target is: {}", (1 - status.getCosAngle()), convergenceTarget);
state.getStatusProgress().add(status);
}
return (state.getStatusProgress().size() <= maxPassesPerEigen && 1 - status.getCosAngle() > convergenceTarget);
@@ -270,6 +300,7 @@ public class HebbianSolver {
String corpusDir = props.getProperty("solver.input.dir");
String outputDir = props.getProperty("solver.output.dir");
if (corpusDir == null || corpusDir.length() == 0 || outputDir == null || outputDir.length() == 0) {
+ log.error("{} must contain values for solver.input.dir and solver.output.dir", propertiesFile);
return;
}
int inBufferSize = Integer.parseInt(props.getProperty("solver.input.bufferSize"));
@@ -290,7 +321,11 @@ public class HebbianSolver {
} else {
// corpus = new ParallelMultiplyingDiskBufferedDoubleMatrix(new File(corpusDir), inBufferSize, numThreads);
}
+ long now = System.currentTimeMillis();
TrainingState finalState = solver.solve(corpus, rank);
+ long time = (System.currentTimeMillis() - now) / 1000;
+ log.info("Solved {} eigenVectors in {} seconds. Persisted to {}",
+ new Object[] {finalState.getCurrentEigens().size()[AbstractMatrix.ROW], time, outputDir});
}
}
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java?rev=930799&r1=930798&r2=930799&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java Mon Apr 5 05:51:48 2010
@@ -35,6 +35,8 @@ import org.apache.mahout.math.matrix.Dou
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
import org.apache.mahout.math.matrix.linalg.EigenvalueDecomposition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p>Simple implementation of the <a href="http://en.wikipedia.org/wiki/Lanczos_algorithm">Lanczos algorithm</a> for
@@ -63,6 +65,8 @@ import org.apache.mahout.math.matrix.lin
*/
public class LanczosSolver {
+ private static final Logger log = LoggerFactory.getLogger(LanczosSolver.class);
+
public static final double SAFE_MAX = 1.0e150;
private static final double NANOS_IN_MILLI = 1.0e6;
@@ -73,7 +77,7 @@ public class LanczosSolver {
private final Map<TimingSection, Long> startTimes = new EnumMap<TimingSection, Long>(TimingSection.class);
private final Map<TimingSection, Long> times = new EnumMap<TimingSection, Long>(TimingSection.class);
- protected double scaleFactor = 0.0;
+ protected double scaleFactor = 0;
private static final class Scale implements UnaryFunction {
private final double d;
@@ -99,6 +103,7 @@ public class LanczosSolver {
Matrix eigenVectors,
List<Double> eigenValues,
boolean isSymmetric) {
+ log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
Vector currentVector = getInitialVector(corpus);
Vector previousVector = new DenseVector(currentVector.size());
Matrix basis = new SparseRowMatrix(new int[]{desiredRank, corpus.numCols()});
@@ -109,6 +114,7 @@ public class LanczosSolver {
for (int i = 1; i < desiredRank; i++) {
startTime(TimingSection.ITERATE);
Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
+ log.info("{} passes through the corpus so far...", i);
calculateScaleFactor(nextVector);
nextVector.assign(new Scale(1 / scaleFactor));
nextVector.assign(previousVector, new PlusMult(-beta));
@@ -122,6 +128,7 @@ public class LanczosSolver {
// and normalize
beta = nextVector.norm(2);
if (outOfRange(beta) || outOfRange(alpha)) {
+ log.warn("Lanczos parameters out of range: alpha = {}, beta = {}. Bailing out early!", alpha, beta);
break;
}
final double b = beta;
@@ -138,6 +145,7 @@ public class LanczosSolver {
}
startTime(TimingSection.TRIDIAG_DECOMP);
+ log.info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
// at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
EigenvalueDecomposition decomp = new EigenvalueDecomposition(triDiag);
@@ -156,8 +164,10 @@ public class LanczosSolver {
}
realEigen = realEigen.normalize();
eigenVectors.assignRow(i, realEigen);
+ log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
eigenValues.add(eigenVals.get(i));
}
+ log.info("LanczosSolver finished.");
endTime(TimingSection.FINAL_EIGEN_CREATE);
}