You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/09/25 11:51:44 UTC
svn commit: r1001180 [2/2] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/
core/src/main/java/org/apache/mahout/classifier/sgd/
core/src/main/java/org/...
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Sat Sep 25 09:51:42 2010
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.ep.State;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -45,6 +46,7 @@ import java.io.Reader;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@@ -97,17 +99,18 @@ import java.util.Set;
* <tr><td colspan=4><hr></td></tr>
* </table>
*/
-public class TrainNewsGroups {
+public final class TrainNewsGroups {
+
private static final int FEATURES = 10000;
// 1997-01-15 00:01:00 GMT
private static final long DATE_REFERENCE = 853286460;
private static final long MONTH = 30 * 24 * 3600;
private static final long WEEK = 7 * 24 * 3600;
- private static final Random rand = new Random();
+ private static final Random rand = RandomUtils.getRandom();
private static final String[] leakLabels = {"none", "month-year", "day-month-year"};
- private static final SimpleDateFormat[] df = new SimpleDateFormat[]{
+ private static final SimpleDateFormat[] df = {
new SimpleDateFormat(""),
new SimpleDateFormat("MMM-yyyy"),
new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss")
@@ -117,6 +120,9 @@ public class TrainNewsGroups {
private static final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
private static final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
+ private TrainNewsGroups() {
+ }
+
public static void main(String[] args) throws IOException {
File base = new File(args[0]);
@@ -145,7 +151,7 @@ public class TrainNewsGroups {
int k = 0;
double step = 0;
- int[] bumps = new int[]{1, 2, 5};
+ int[] bumps = {1, 2, 5};
for (File file : files.subList(0, 10000)) {
String ng = file.getParentFile().getName();
int actual = newsGroups.intern(ng);
@@ -242,25 +248,28 @@ public class TrainNewsGroups {
Multiset<String> words = ConcurrentHashMultiset.create();
BufferedReader reader = new BufferedReader(new FileReader(file));
- String line = reader.readLine();
- Reader dateString = new StringReader(df[leakType % 3].format(new Date(date)));
- countWords(analyzer, words, dateString);
- while (line != null && line.length() > 0) {
- boolean countHeader = (
- line.startsWith("From:") || line.startsWith("Subject:") ||
- line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6);
- do {
- StringReader in = new StringReader(line);
- if (countHeader) {
- countWords(analyzer, words, in);
- }
- line = reader.readLine();
- } while (line.startsWith(" "));
- }
- if (leakType < 3) {
- countWords(analyzer, words, reader);
+ try {
+ String line = reader.readLine();
+ Reader dateString = new StringReader(df[leakType % 3].format(new Date(date)));
+ countWords(analyzer, words, dateString);
+ while (line != null && line.length() > 0) {
+ boolean countHeader = (
+ line.startsWith("From:") || line.startsWith("Subject:") ||
+ line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6);
+ do {
+ Reader in = new StringReader(line);
+ if (countHeader) {
+ countWords(analyzer, words, in);
+ }
+ line = reader.readLine();
+ } while (line.startsWith(" "));
+ }
+ if (leakType < 3) {
+ countWords(analyzer, words, reader);
+ }
+ } finally {
+ reader.close();
}
- reader.close();
Vector v = new RandomAccessSparseVector(FEATURES);
bias.addToVector("", 1, v);
@@ -271,7 +280,7 @@ public class TrainNewsGroups {
return v;
}
- private static void countWords(Analyzer analyzer, Multiset<String> words, Reader in) throws IOException {
+ private static void countWords(Analyzer analyzer, Collection<String> words, Reader in) throws IOException {
TokenStream ts = analyzer.tokenStream("text", in);
ts.addAttribute(TermAttribute.class);
while (ts.incrementToken()) {
@@ -280,7 +289,7 @@ public class TrainNewsGroups {
}
}
- private static List<File> permute(List<File> files, Random rand) {
+ private static List<File> permute(Iterable<File> files, Random rand) {
List<File> r = Lists.newArrayList();
for (File file : files) {
int i = rand.nextInt(r.size() + 1);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Sat Sep 25 09:51:42 2010
@@ -34,7 +34,7 @@ import org.apache.mahout.common.StringUt
/**
* Separate the input data into a training and testing set.
*/
-public class DatasetSplit {
+public final class DatasetSplit {
private static final String SEED = "traintest.seed";
@@ -119,7 +119,7 @@ public class DatasetSplit {
}
/**
- * a {@link org.apache.hadoop.mapred.LineRecordReader LineRecordReader} that skips some lines from the
+ * a {@link RecordReader} that skips some lines from the
* input. Uses a Random number generator with a specific seed to decide if a line will be skipped or not.
*/
public static class RndLineRecordReader extends RecordReader<LongWritable, Text> {
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java Sat Sep 25 09:51:42 2010
@@ -22,7 +22,6 @@ import com.google.common.base.Charsets;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
-import com.google.common.io.Files;
import com.google.common.io.Resources;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.examples.MahoutTestCase;
@@ -30,18 +29,20 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.junit.Test;
-import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
public class TrainLogisticTest extends MahoutTestCase {
- Splitter onWhiteSpace = Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings();
+
+ private static final Splitter ON_WHITE_SPACE =
+ Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings();
+
@Test
public void testMain() throws IOException {
String outputFile = "./model";
String inputFile = "donut.csv";
- String[] args = Iterables.toArray(onWhiteSpace.split(
+ String[] args = Iterables.toArray(ON_WHITE_SPACE.split(
"--input " +
inputFile +
" --output " +
@@ -50,9 +51,9 @@ public class TrainLogisticTest extends M
"--predictors x y --types numeric --features 20 --passes 100 --rate 50 "), String.class);
TrainLogistic.main(args);
LogisticModelParameters lmp = TrainLogistic.getParameters();
- assertEquals(1e-4, lmp.getLambda(), 1e-9);
+ assertEquals(1.0e-4, lmp.getLambda(), 1.0e-9);
assertEquals(20, lmp.getNumFeatures());
- assertEquals(true, lmp.useBias());
+ assertTrue(lmp.useBias());
assertEquals("color", lmp.getTargetVariable());
CsvRecordFactory csv = lmp.getCsvRecordFactory();
assertEquals("[1, 2]", Sets.newTreeSet(csv.getTargetCategories()).toString());
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -25,6 +25,6 @@ package org.apache.mahout.examples;
public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
/** "Close enough" value for floating-point comparisons. */
- public static final double EPSILON = 0.0000001;
+ public static final double EPSILON = 0.000001;
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Sep 25 09:51:42 2010
@@ -573,15 +573,15 @@ public abstract class AbstractVector imp
}
public double get() {
- return getQuick(index);
- }
+ return getQuick(index);
+ }
- public int index() {
- return index;
- }
+ public int index() {
+ return index;
+ }
- public void set(double value) {
- setQuick(index, value);
- }
+ public void set(double value) {
+ setQuick(index, value);
+ }
}
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java Sat Sep 25 09:51:42 2010
@@ -144,7 +144,7 @@ import java.util.Date;
@version 1.0, 09/24/99
@see java.util.Random
*/
-public class MersenneTwister extends RandomEngine {
+public final class MersenneTwister extends RandomEngine {
/* Period parameters */
private static final int N = 624;
@@ -206,7 +206,7 @@ public class MersenneTwister extends Ran
}
/** Generates N words at one time */
- protected void nextBlock() {
+ void nextBlock() {
int y;
int kk;
@@ -248,7 +248,7 @@ public class MersenneTwister extends Ran
/** Sets the receiver's seed. This method resets the receiver's entire internal state.
* @param seed An integer that is used to reset the internal state of the generator */
- protected void setSeed(int seed) {
+ void setSeed(int seed) {
mt[0] = seed;
for (int i = 1; i < N; i++) {
mt[i] = 1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i;
@@ -275,7 +275,7 @@ public class MersenneTwister extends Ran
* done in the 1999 reference implementation. Should only be used for testing, not
* actual coding.
*/
- protected void setReferenceSeed(int seed) {
+ void setReferenceSeed(int seed) {
for (int i = 0; i < N; i++) {
mt[i] = seed & 0xffff0000;
seed = 69069 * seed + 1;
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java Sat Sep 25 09:51:42 2010
@@ -14,7 +14,7 @@ import java.util.Random;
/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
@Deprecated
-public class RandomSamplingAssistant extends PersistentObject {
+public final class RandomSamplingAssistant extends PersistentObject {
private static final int MAX_BUFFER_SIZE = 200;
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Sat Sep 25 09:51:42 2010
@@ -30,7 +30,7 @@ import java.util.Random;
/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
@Deprecated
-public class WeightedRandomSampler extends PersistentObject {
+public final class WeightedRandomSampler extends PersistentObject {
//public class BlockedRandomSampler extends Object implements java.io.Serializable {
private int skip;
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java Sat Sep 25 09:51:42 2010
@@ -672,9 +672,10 @@ public abstract class DoubleMatrix2D ext
* @param columnIndexes The columns of the cells that shall be visible in the new view. To indicate that <i>all</i>
* columns shall be visible, simply set this parameter to <tt>null</tt>.
* @return the new view.
- * @throws IndexOutOfBoundsException if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>.
- * @throws IndexOutOfBoundsException if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any
- * <tt>i=0..columnIndexes.length()-1</tt>.
+ * @throws IndexOutOfBoundsException
+ * if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>.
+ * @throws IndexOutOfBoundsException
+ * if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any <tt>i=0..columnIndexes.length()-1</tt>.
*/
public DoubleMatrix2D viewSelection(int[] rowIndexes, int[] columnIndexes) {
// check for "all"
@@ -792,13 +793,15 @@ public abstract class DoubleMatrix2D ext
*
* // 8 neighbors org.apache.mahout.math.function.Double9Function f = new Double9Function() {
* public final double apply( double a00, double a01, double
- * a02, double a10, double a11, double a12, double
+ * a02, double a10, double a11, double a12,
+ * double
* a20, double a21, double a22) { return beta*a11 +
* alpha*(a00+a01+a02 + a10+a12 + a20+a21+a22); } }; A.zAssign8Neighbors(B,f);
*
* // 4 neighbors org.apache.mahout.math.function.Double9Function g = new Double9Function() {
* public final double apply( double a00, double a01, double
- * a02, double a10, double a11, double a12, double
+ * a02, double a10, double a11, double a12,
+ * double
* a20, double a21, double a22) { return beta*a11 + alpha*(a01+a10+a12+a21);
* } C.zAssign8Neighbors(B,g); // fast, even though it doesn't look like it }; </pre>
*
@@ -848,9 +851,12 @@ public abstract class DoubleMatrix2D ext
}
}
- /** Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>; Equivalent to <tt>return A.zMult(y,z,1,0);</tt> */
+ /**
+ * Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>;
+ * Equivalent to <tt>return A.zMult(y,z,1,0);</tt>
+ */
public DoubleMatrix1D zMult(DoubleMatrix1D y, DoubleMatrix1D z) {
- return zMult(y, z, 1, (z == null ? 1 : 0), false);
+ return zMult(y, z, 1, z == null ? 1 : 0, false);
}
/**
@@ -891,7 +897,7 @@ public abstract class DoubleMatrix2D ext
* <tt>A.zMult(B,C,1,0,false,false)</tt>.
*/
public DoubleMatrix2D zMult(DoubleMatrix2D B, DoubleMatrix2D C) {
- return zMult(B, C, 1, (C == null ? 1 : 0), false, false);
+ return zMult(B, C, 1, C == null ? 1 : 0, false, false);
}
/**
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java Sat Sep 25 09:51:42 2010
@@ -9,8 +9,10 @@ It is provided "as is" without expressed
package org.apache.mahout.math.matrix.impl;
/**
- Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as <code>int</code>, <code>double</code>, etc.
- First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as
+ <code>int</code>, <code>double</code>, etc.
+ First see the <a href="package-summary.html">package summary</a> and javadoc
+ <a href="package-tree.html">tree view</a> to get the broad picture.
<p>
<b>Note that this implementation is not synchronized.</b>
@@ -53,7 +55,7 @@ public abstract class AbstractMatrix1D e
* @param rank the relative rank of the element.
* @return the absolute rank of the element.
*/
- protected int _rank(int rank) {
+ protected int rank(int rank) {
return zero + rank * stride;
//return zero + ((rank+flipMask)^flipMask);
//return zero + rank*flip; // slower
@@ -98,22 +100,22 @@ public abstract class AbstractMatrix1D e
/**
* Sanity check for operations requiring two matrices with the same size.
*
- * @throws IllegalArgumentException if <tt>size() != B.size()</tt>.
+ * @throws IllegalArgumentException if <tt>size() != b.size()</tt>.
*/
- protected void checkSize(double[] B) {
- if (size != B.length) {
- throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.length);
+ protected void checkSize(double[] b) {
+ if (size != b.length) {
+ throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.length);
}
}
/**
* Sanity check for operations requiring two matrices with the same size.
*
- * @throws IllegalArgumentException if <tt>size() != B.size()</tt>.
+ * @throws IllegalArgumentException if <tt>size() != b.size()</tt>.
*/
- public void checkSize(AbstractMatrix1D B) {
- if (size != B.size) {
- throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.size);
+ public void checkSize(AbstractMatrix1D b) {
+ if (size != b.size) {
+ throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.size);
}
}
@@ -124,7 +126,7 @@ public abstract class AbstractMatrix1D e
* @param rank the rank of the element.
*/
protected int index(int rank) {
- return offset(_rank(rank));
+ return offset(rank(rank));
}
/**
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java Sat Sep 25 09:51:42 2010
@@ -16,7 +16,7 @@ import org.apache.mahout.math.matrix.Dou
/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
@Deprecated
-public class Algebra extends PersistentObject {
+public final class Algebra extends PersistentObject {
/**
* A default Algebra object; has {@link Property#DEFAULT} attached for tolerance. Allows ommiting to construct an
@@ -85,7 +85,7 @@ public class Algebra extends PersistentO
}
/** Returns sqrt(a^2 + b^2) without under/overflow. */
- protected static double hypot(double a, double b) {
+ static double hypot(double a, double b) {
double r;
if (Math.abs(a) > Math.abs(b)) {
r = b / a;
@@ -280,7 +280,7 @@ public class Algebra extends PersistentO
* @return X; a new independent matrix; solution if A is square, least squares solution otherwise.
*/
public static DoubleMatrix2D solve(DoubleMatrix2D A, DoubleMatrix2D B) {
- return (A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B)));
+ return A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B));
}
/**
@@ -288,7 +288,7 @@ public class Algebra extends PersistentO
*
* @return <tt>A</tt> (for convenience only).
*/
- protected static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) {
+ static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) {
int rows = A.rows();
int columns = A.columns();
for (int r = rows; --r >= 0;) {
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java Sat Sep 25 09:51:42 2010
@@ -14,9 +14,11 @@ import org.apache.mahout.math.list.IntAr
import org.apache.mahout.math.matrix.DoubleMatrix1D;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
+import java.io.Serializable;
+
/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
@Deprecated
-public class LUDecompositionQuick implements java.io.Serializable {
+public class LUDecompositionQuick implements Serializable {
/** Array for internal storage of decomposition. */
private DoubleMatrix2D lu;
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -32,7 +32,7 @@ import org.junit.Before;
public abstract class MahoutTestCase extends Assert {
/** "Close enough" value for floating-point comparisons. */
- public static final double EPSILON = 0.0000001;
+ public static final double EPSILON = 0.000001;
private File testTempDir;
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Sat Sep 25 09:51:42 2010
@@ -36,7 +36,7 @@ public final class TestSingularValueDeco
{ -360.0 / 625.0, 192.0 / 625.0, 1756.0 / 625.0 },
};
- private static final double normTolerance = 10.0e-14;
+ private static final double NORM_TOLERANCE = 10.0e-14;
@Test
public void testMoreRows() {
@@ -158,7 +158,7 @@ public final class TestSingularValueDeco
}
double norm = Algebra.getNorm(u.times(s).times(v.transpose()).minus(matrix));
- assertEquals(0, norm, normTolerance);
+ assertEquals(0, norm, NORM_TOLERANCE);
}
@@ -184,7 +184,7 @@ public final class TestSingularValueDeco
for (int i = 0; i < mTm.numRows(); i++) {
id.set(i, i, 1);
}
- assertEquals(0, Algebra.getNorm(mTm.minus(id)), normTolerance);
+ assertEquals(0, Algebra.getNorm(mTm.minus(id)), NORM_TOLERANCE);
}
/** test matrices values */
@@ -208,11 +208,11 @@ public final class TestSingularValueDeco
// check values against known references
Matrix u = svd.getU();
- assertEquals(0, Algebra.getNorm(u.minus(uRef)), normTolerance);
+ assertEquals(0, Algebra.getNorm(u.minus(uRef)), NORM_TOLERANCE);
Matrix s = svd.getS();
- assertEquals(0, Algebra.getNorm(s.minus(sRef)), normTolerance);
+ assertEquals(0, Algebra.getNorm(s.minus(sRef)), NORM_TOLERANCE);
Matrix v = svd.getV();
- assertEquals(0, Algebra.getNorm(v.minus(vRef)), normTolerance);
+ assertEquals(0, Algebra.getNorm(v.minus(vRef)), NORM_TOLERANCE);
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java Sat Sep 25 09:51:42 2010
@@ -143,7 +143,7 @@ public final class TestHebbianSolver ext
_eigensVectorFactory = new DenseMapVectorFactory();
int desiredRank = 200;
long time = timeSolver(TMP_EIGEN_DIR,
- 0.00001,
+ 0.00001,
5,
desiredRank,
new TrainingState());
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Sat Sep 25 09:51:42 2010
@@ -90,37 +90,37 @@ public final class SequenceFileDumper {
} else {
writer = new OutputStreamWriter(System.out);
}
- writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
-
- int sub = Integer.MAX_VALUE;
- if (cmdLine.hasOption(substringOpt)) {
- sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
- }
- boolean countOnly = cmdLine.hasOption(countOpt);
- Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
- Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
- writer.append("Key class: ").append(String.valueOf(reader.getKeyClass()));
- writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n');
- writer.flush();
- long count = 0;
- if (countOnly) {
- while (reader.next(key, value)) {
- count++;
+ try {
+ writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
+
+ int sub = Integer.MAX_VALUE;
+ if (cmdLine.hasOption(substringOpt)) {
+ sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
}
- writer.append("Count: ").append(String.valueOf(count)).append('\n');
- } else {
- while (reader.next(key, value)) {
- writer.append("Key: ").append(String.valueOf(key));
- String str = value.toString();
- writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
- writer.write('\n');
- writer.flush();
- count++;
+ boolean countOnly = cmdLine.hasOption(countOpt);
+ Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+ Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
+ writer.append("Key class: ").append(String.valueOf(reader.getKeyClass()));
+ writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n');
+ writer.flush();
+ long count = 0;
+ if (countOnly) {
+ while (reader.next(key, value)) {
+ count++;
+ }
+ writer.append("Count: ").append(String.valueOf(count)).append('\n');
+ } else {
+ while (reader.next(key, value)) {
+ writer.append("Key: ").append(String.valueOf(key));
+ String str = value.toString();
+ writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
+ writer.write('\n');
+ writer.flush();
+ count++;
+ }
+ writer.append("Count: ").append(String.valueOf(count)).append('\n');
}
- writer.append("Count: ").append(String.valueOf(count)).append('\n');
- }
- writer.flush();
- if (cmdLine.hasOption(outputOpt)) {
+ } finally {
writer.close();
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sat Sep 25 09:51:42 2010
@@ -73,13 +73,13 @@ public final class ClusterDumper extends
private Path seqFileDir;
- private Path pointsDir = null;
+ private Path pointsDir;
- private String termDictionary = null;
+ private String termDictionary;
- private String dictionaryFormat = null;
+ private String dictionaryFormat;
- private String outputFile = null;
+ private String outputFile;
private int subString = Integer.MAX_VALUE;
@@ -109,9 +109,11 @@ public final class ClusterDumper extends
addOption(OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console.");
addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
- addOption(JSON_OPTION, "j", "Output the centroid as JSON. Otherwise it substitues in the terms for vector cell entries");
- addOption(POINTS_DIR_OPTION, "p", "The directory containing points sequence files mapping input vectors to their cluster. "
- + "If specified, then the program will output the points associated with a cluster");
+ addOption(JSON_OPTION, "j",
+ "Output the centroid as JSON. Otherwise it substitues in the terms for vector cell entries");
+ addOption(POINTS_DIR_OPTION, "p",
+ "The directory containing points sequence files mapping input vectors to their cluster. "
+ + "If specified, then the program will output the points associated with a cluster");
addOption(DICTIONARY_OPTION, "d", "The dictionary file");
addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text");
if (parseArguments(args) == null) {
@@ -256,11 +258,8 @@ public final class ClusterDumper extends
return this.numTopFeatures;
}
- private void setUseJSON(boolean json) {
- this.useJSON = json;
- }
-
- private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, Configuration conf) throws IOException {
+ private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir,
+ Configuration conf) throws IOException {
Map<Integer, List<WeightedVectorWritable>> result = new TreeMap<Integer, List<WeightedVectorWritable>>();
FileSystem fs = pointsPathDir.getFileSystem(conf);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java Sat Sep 25 09:51:42 2010
@@ -29,8 +29,7 @@ import org.apache.mahout.math.Varint;
import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type;
/** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */
-public class GramKey extends BinaryComparable implements
- WritableComparable<BinaryComparable> {
+public final class GramKey extends BinaryComparable implements WritableComparable<BinaryComparable> {
private int primaryLength;
private int length;
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java Sat Sep 25 09:51:42 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.utils.vectors.common;
import java.io.IOException;
-import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -71,13 +70,10 @@ public final class PartialVectorMerger {
* @param dimension
* @param sequentialAccess
* output vectors should be optimized for sequential access
- * @param namedVectors
+ * @param namedVector
* output vectors should be named, retaining key (doc id) as a label
* @param numReducers
* The number of reducers to spawn
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public static void mergePartialVectors(Iterable<Path> partialVectorPaths,
Path output,
@@ -122,7 +118,7 @@ public final class PartialVectorMerger {
}
private static String getCommaSeparatedPaths(Iterable<Path> paths) {
- StringBuilder commaSeparatedPaths = new StringBuilder();
+ StringBuilder commaSeparatedPaths = new StringBuilder(100);
String sep = "";
for (Path path : paths) {
commaSeparatedPaths.append(sep).append(path.toString());
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Sat Sep 25 09:51:42 2010
@@ -29,7 +29,6 @@ import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
@@ -180,8 +179,8 @@ public class ClusterLabels {
/**
* Get the list of labels, sorted by best score.
*/
- protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, Collection<WeightedVectorWritable> wvws)
- throws IOException {
+ protected List<TermInfoClusterInOut> getClusterLabels(Integer integer,
+ Collection<WeightedVectorWritable> wvws) throws IOException {
if (wvws.size() < minNumIds) {
log.info("Skipping small cluster {} with size: {}", integer, wvws.size());
@@ -267,8 +266,9 @@ public class ClusterLabels {
return clusteredTermInfo.subList(0, Math.min(clusteredTermInfo.size(), maxLabels));
}
- private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField)
- throws IOException {
+ private static OpenBitSet getClusterDocBitset(IndexReader reader,
+ Collection<String> idSet,
+ String idField) throws IOException {
int numDocs = reader.numDocs();
OpenBitSet bitset = new OpenBitSet(numDocs);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Sat Sep 25 09:51:42 2010
@@ -141,8 +141,8 @@ public final class Driver {
if (cmdLine.hasOption(inputOpt)) { // Lucene case
File file = new File(cmdLine.getValue(inputOpt).toString());
if (!file.isDirectory()) {
- throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath() +
- " does not exist or is not a directory");
+ throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath()
+ + " does not exist or is not a directory");
}
long maxDocs = Long.MAX_VALUE;
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -25,6 +25,6 @@ package org.apache.mahout.utils;
public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
/** "Close enough" value for floating-point comparisons. */
- public static final double EPSILON = 0.0000001;
+ public static final double EPSILON = 0.000001;
}