You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/09/25 11:51:44 UTC

svn commit: r1001180 [2/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ core/src/main/java/org/apache/mahout/classifier/sgd/ core/src/main/java/org/...

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Sat Sep 25 09:51:42 2010
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.ep.State;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.RandomAccessSparseVector;
@@ -45,6 +46,7 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
 import java.util.List;
@@ -97,17 +99,18 @@ import java.util.Set;
  * <tr><td colspan=4><hr></td></tr>
  * </table>
  */
-public class TrainNewsGroups {
+public final class TrainNewsGroups {
+
   private static final int FEATURES = 10000;
   // 1997-01-15 00:01:00 GMT
   private static final long DATE_REFERENCE = 853286460;
   private static final long MONTH = 30 * 24 * 3600;
   private static final long WEEK = 7 * 24 * 3600;
 
-  private static final Random rand = new Random();
+  private static final Random rand = RandomUtils.getRandom();
 
   private static final String[] leakLabels = {"none", "month-year", "day-month-year"};
-  private static final SimpleDateFormat[] df = new SimpleDateFormat[]{
+  private static final SimpleDateFormat[] df = {
     new SimpleDateFormat(""),
     new SimpleDateFormat("MMM-yyyy"),
     new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss")
@@ -117,6 +120,9 @@ public class TrainNewsGroups {
   private static final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
   private static final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
 
+  private TrainNewsGroups() {
+  }
+
   public static void main(String[] args) throws IOException {
     File base = new File(args[0]);
 
@@ -145,7 +151,7 @@ public class TrainNewsGroups {
 
     int k = 0;
     double step = 0;
-    int[] bumps = new int[]{1, 2, 5};
+    int[] bumps = {1, 2, 5};
     for (File file : files.subList(0, 10000)) {
       String ng = file.getParentFile().getName();
       int actual = newsGroups.intern(ng);
@@ -242,25 +248,28 @@ public class TrainNewsGroups {
     Multiset<String> words = ConcurrentHashMultiset.create();
 
     BufferedReader reader = new BufferedReader(new FileReader(file));
-    String line = reader.readLine();
-    Reader dateString = new StringReader(df[leakType % 3].format(new Date(date)));
-    countWords(analyzer, words, dateString);
-    while (line != null && line.length() > 0) {
-      boolean countHeader = (
-        line.startsWith("From:") || line.startsWith("Subject:") ||
-          line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6);
-      do {
-        StringReader in = new StringReader(line);
-        if (countHeader) {
-          countWords(analyzer, words, in);
-        }
-        line = reader.readLine();
-      } while (line.startsWith(" "));
-    }
-    if (leakType < 3) {
-      countWords(analyzer, words, reader);
+    try {
+      String line = reader.readLine();
+      Reader dateString = new StringReader(df[leakType % 3].format(new Date(date)));
+      countWords(analyzer, words, dateString);
+      while (line != null && line.length() > 0) {
+        boolean countHeader = (
+          line.startsWith("From:") || line.startsWith("Subject:") ||
+            line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6);
+        do {
+          Reader in = new StringReader(line);
+          if (countHeader) {
+            countWords(analyzer, words, in);
+          }
+          line = reader.readLine();
+        } while (line.startsWith(" "));
+      }
+      if (leakType < 3) {
+        countWords(analyzer, words, reader);
+      }
+    } finally {
+      reader.close();
     }
-    reader.close();
 
     Vector v = new RandomAccessSparseVector(FEATURES);
     bias.addToVector("", 1, v);
@@ -271,7 +280,7 @@ public class TrainNewsGroups {
     return v;
   }
 
-  private static void countWords(Analyzer analyzer, Multiset<String> words, Reader in) throws IOException {
+  private static void countWords(Analyzer analyzer, Collection<String> words, Reader in) throws IOException {
     TokenStream ts = analyzer.tokenStream("text", in);
     ts.addAttribute(TermAttribute.class);
     while (ts.incrementToken()) {
@@ -280,7 +289,7 @@ public class TrainNewsGroups {
     }
   }
 
-  private static List<File> permute(List<File> files, Random rand) {
+  private static List<File> permute(Iterable<File> files, Random rand) {
     List<File> r = Lists.newArrayList();
     for (File file : files) {
       int i = rand.nextInt(r.size() + 1);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Sat Sep 25 09:51:42 2010
@@ -34,7 +34,7 @@ import org.apache.mahout.common.StringUt
 /**
  * Separate the input data into a training and testing set.
  */
-public class DatasetSplit {
+public final class DatasetSplit {
 
   private static final String SEED = "traintest.seed";
 
@@ -119,7 +119,7 @@ public class DatasetSplit {
   }
 
   /**
-   * a {@link org.apache.hadoop.mapred.LineRecordReader LineRecordReader} that skips some lines from the
+   * a {@link RecordReader} that skips some lines from the
    * input. Uses a Random number generator with a specific seed to decide if a line will be skipped or not.
    */
   public static class RndLineRecordReader extends RecordReader<LongWritable, Text> {

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java Sat Sep 25 09:51:42 2010
@@ -22,7 +22,6 @@ import com.google.common.base.Charsets;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Sets;
-import com.google.common.io.Files;
 import com.google.common.io.Resources;
 import org.apache.mahout.classifier.AbstractVectorClassifier;
 import org.apache.mahout.examples.MahoutTestCase;
@@ -30,18 +29,20 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.junit.Test;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import java.util.Set;
 
 public class TrainLogisticTest extends MahoutTestCase {
-  Splitter onWhiteSpace = Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings();
+
+  private static final Splitter ON_WHITE_SPACE = 
+      Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings();
+
   @Test
   public void testMain() throws IOException {
     String outputFile = "./model";
     String inputFile = "donut.csv";
-    String[] args = Iterables.toArray(onWhiteSpace.split(
+    String[] args = Iterables.toArray(ON_WHITE_SPACE.split(
       "--input " +
         inputFile +
         " --output " +
@@ -50,9 +51,9 @@ public class TrainLogisticTest extends M
         "--predictors x y --types numeric --features 20 --passes 100 --rate 50 "), String.class);
     TrainLogistic.main(args);
     LogisticModelParameters lmp = TrainLogistic.getParameters();
-    assertEquals(1e-4, lmp.getLambda(), 1e-9);
+    assertEquals(1.0e-4, lmp.getLambda(), 1.0e-9);
     assertEquals(20, lmp.getNumFeatures());
-    assertEquals(true, lmp.useBias());
+    assertTrue(lmp.useBias());
     assertEquals("color", lmp.getTargetVariable());
     CsvRecordFactory csv = lmp.getCsvRecordFactory();
     assertEquals("[1, 2]", Sets.newTreeSet(csv.getTargetCategories()).toString());

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -25,6 +25,6 @@ package org.apache.mahout.examples;
 public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
 
   /** "Close enough" value for floating-point comparisons. */
-  public static final double EPSILON = 0.0000001;
+  public static final double EPSILON = 0.000001;
 
 }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Sep 25 09:51:42 2010
@@ -573,15 +573,15 @@ public abstract class AbstractVector imp
     }
 
     public double get() {
-        return getQuick(index);
-      }
+      return getQuick(index);
+    }
 
-      public int index() {
-        return index;
-      }
+    public int index() {
+      return index;
+    }
 
-      public void set(double value) {
-        setQuick(index, value);
-      }
+    public void set(double value) {
+      setQuick(index, value);
+    }
   }
 }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java Sat Sep 25 09:51:42 2010
@@ -144,7 +144,7 @@ import java.util.Date;
  @version 1.0, 09/24/99
  @see java.util.Random
    */
-public class MersenneTwister extends RandomEngine {
+public final class MersenneTwister extends RandomEngine {
 
   /* Period parameters */
   private static final int N = 624;
@@ -206,7 +206,7 @@ public class MersenneTwister extends Ran
   }
 
   /** Generates N words at one time */
-  protected void nextBlock() {
+  void nextBlock() {
     int y;
     int kk;
 
@@ -248,7 +248,7 @@ public class MersenneTwister extends Ran
 
   /** Sets the receiver's seed. This method resets the receiver's entire internal state.
    * @param seed An integer that is used to reset the internal state of the generator */
-  protected void setSeed(int seed) {
+  void setSeed(int seed) {
     mt[0] = seed;
     for (int i = 1; i < N; i++) {
       mt[i] = 1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i;
@@ -275,7 +275,7 @@ public class MersenneTwister extends Ran
    * done in the 1999 reference implementation.  Should only be used for testing, not
    * actual coding.
    */
-  protected void setReferenceSeed(int seed) {
+  void setReferenceSeed(int seed) {
     for (int i = 0; i < N; i++) {
       mt[i] = seed & 0xffff0000;
       seed = 69069 * seed + 1;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java Sat Sep 25 09:51:42 2010
@@ -14,7 +14,7 @@ import java.util.Random;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
-public class RandomSamplingAssistant extends PersistentObject {
+public final class RandomSamplingAssistant extends PersistentObject {
 
   private static final int MAX_BUFFER_SIZE = 200;
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Sat Sep 25 09:51:42 2010
@@ -30,7 +30,7 @@ import java.util.Random;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
-public class WeightedRandomSampler extends PersistentObject {
+public final class WeightedRandomSampler extends PersistentObject {
 
   //public class BlockedRandomSampler extends Object implements java.io.Serializable {
   private int skip;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java Sat Sep 25 09:51:42 2010
@@ -672,9 +672,10 @@ public abstract class DoubleMatrix2D ext
    * @param columnIndexes The columns of the cells that shall be visible in the new view. To indicate that <i>all</i>
    *                      columns shall be visible, simply set this parameter to <tt>null</tt>.
    * @return the new view.
-   * @throws IndexOutOfBoundsException if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>.
-   * @throws IndexOutOfBoundsException if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any
-   *                                   <tt>i=0..columnIndexes.length()-1</tt>.
+   * @throws IndexOutOfBoundsException
+   *  if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>.
+   * @throws IndexOutOfBoundsException
+   *  if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any <tt>i=0..columnIndexes.length()-1</tt>.
    */
   public DoubleMatrix2D viewSelection(int[] rowIndexes, int[] columnIndexes) {
     // check for "all"
@@ -792,13 +793,15 @@ public abstract class DoubleMatrix2D ext
    *
    * // 8 neighbors org.apache.mahout.math.function.Double9Function f = new Double9Function() {
    * &nbsp;&nbsp;&nbsp;public final double apply( &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a00, double a01, double
-   * a02, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a10, double a11, double a12, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double
+   * a02, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a10, double a11, double a12,
+   * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double
    * a20, double a21, double a22) { &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return beta*a11 +
    * alpha*(a00+a01+a02 + a10+a12 + a20+a21+a22); &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;} }; A.zAssign8Neighbors(B,f);
    *
    * // 4 neighbors org.apache.mahout.math.function.Double9Function g = new Double9Function() {
    * &nbsp;&nbsp;&nbsp;public final double apply( &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a00, double a01, double
-   * a02, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a10, double a11, double a12, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double
+   * a02, &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double a10, double a11, double a12,
+   * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;double
    * a20, double a21, double a22) { &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return beta*a11 + alpha*(a01+a10+a12+a21);
    * &nbsp;&nbsp;&nbsp;} C.zAssign8Neighbors(B,g); // fast, even though it doesn't look like it }; </pre>
    *
@@ -848,9 +851,12 @@ public abstract class DoubleMatrix2D ext
     }
   }
 
-  /** Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>; Equivalent to <tt>return A.zMult(y,z,1,0);</tt> */
+  /**
+   * Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>;
+   * Equivalent to <tt>return A.zMult(y,z,1,0);</tt>
+   */
   public DoubleMatrix1D zMult(DoubleMatrix1D y, DoubleMatrix1D z) {
-    return zMult(y, z, 1, (z == null ? 1 : 0), false);
+    return zMult(y, z, 1, z == null ? 1 : 0, false);
   }
 
   /**
@@ -891,7 +897,7 @@ public abstract class DoubleMatrix2D ext
    * <tt>A.zMult(B,C,1,0,false,false)</tt>.
    */
   public DoubleMatrix2D zMult(DoubleMatrix2D B, DoubleMatrix2D C) {
-    return zMult(B, C, 1, (C == null ? 1 : 0), false, false);
+    return zMult(B, C, 1, C == null ? 1 : 0, false, false);
   }
 
   /**

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java Sat Sep 25 09:51:42 2010
@@ -9,8 +9,10 @@ It is provided "as is" without expressed
 package org.apache.mahout.math.matrix.impl;
 
 /**
- Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as <code>int</code>, <code>double</code>, etc.
- First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as
+ <code>int</code>, <code>double</code>, etc.
+ First see the <a href="package-summary.html">package summary</a> and javadoc
+ <a href="package-tree.html">tree view</a> to get the broad picture.
  <p>
  <b>Note that this implementation is not synchronized.</b>
 
@@ -53,7 +55,7 @@ public abstract class AbstractMatrix1D e
    * @param rank the relative rank of the element.
    * @return the absolute rank of the element.
    */
-  protected int _rank(int rank) {
+  protected int rank(int rank) {
     return zero + rank * stride;
     //return zero + ((rank+flipMask)^flipMask);
     //return zero + rank*flip; // slower
@@ -98,22 +100,22 @@ public abstract class AbstractMatrix1D e
   /**
    * Sanity check for operations requiring two matrices with the same size.
    *
-   * @throws IllegalArgumentException if <tt>size() != B.size()</tt>.
+   * @throws IllegalArgumentException if <tt>size() != b.size()</tt>.
    */
-  protected void checkSize(double[] B) {
-    if (size != B.length) {
-      throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.length);
+  protected void checkSize(double[] b) {
+    if (size != b.length) {
+      throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.length);
     }
   }
 
   /**
    * Sanity check for operations requiring two matrices with the same size.
    *
-   * @throws IllegalArgumentException if <tt>size() != B.size()</tt>.
+   * @throws IllegalArgumentException if <tt>size() != b.size()</tt>.
    */
-  public void checkSize(AbstractMatrix1D B) {
-    if (size != B.size) {
-      throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.size);
+  public void checkSize(AbstractMatrix1D b) {
+    if (size != b.size) {
+      throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.size);
     }
   }
 
@@ -124,7 +126,7 @@ public abstract class AbstractMatrix1D e
    * @param rank the rank of the element.
    */
   protected int index(int rank) {
-    return offset(_rank(rank));
+    return offset(rank(rank));
   }
 
   /**

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java Sat Sep 25 09:51:42 2010
@@ -16,7 +16,7 @@ import org.apache.mahout.math.matrix.Dou
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
-public class Algebra extends PersistentObject {
+public final class Algebra extends PersistentObject {
 
   /**
    * A default Algebra object; has {@link Property#DEFAULT} attached for tolerance. Allows ommiting to construct an
@@ -85,7 +85,7 @@ public class Algebra extends PersistentO
   }
 
   /** Returns sqrt(a^2 + b^2) without under/overflow. */
-  protected static double hypot(double a, double b) {
+  static double hypot(double a, double b) {
     double r;
     if (Math.abs(a) > Math.abs(b)) {
       r = b / a;
@@ -280,7 +280,7 @@ public class Algebra extends PersistentO
    * @return X; a new independent matrix; solution if A is square, least squares solution otherwise.
    */
   public static DoubleMatrix2D solve(DoubleMatrix2D A, DoubleMatrix2D B) {
-    return (A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B)));
+    return A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B));
   }
 
   /**
@@ -288,7 +288,7 @@ public class Algebra extends PersistentO
    *
    * @return <tt>A</tt> (for convenience only).
    */
-  protected static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) {
+  static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) {
     int rows = A.rows();
     int columns = A.columns();
     for (int r = rows; --r >= 0;) {

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java Sat Sep 25 09:51:42 2010
@@ -14,9 +14,11 @@ import org.apache.mahout.math.list.IntAr
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
 
+import java.io.Serializable;
+
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
-public class LUDecompositionQuick implements java.io.Serializable {
+public class LUDecompositionQuick implements Serializable {
 
   /** Array for internal storage of decomposition. */
   private DoubleMatrix2D lu;

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -32,7 +32,7 @@ import org.junit.Before;
 public abstract class MahoutTestCase extends Assert {
 
   /** "Close enough" value for floating-point comparisons. */
-  public static final double EPSILON = 0.0000001;
+  public static final double EPSILON = 0.000001;
   
   private File testTempDir;
 

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Sat Sep 25 09:51:42 2010
@@ -36,7 +36,7 @@ public final class TestSingularValueDeco
       {  -360.0 / 625.0,  192.0 / 625.0, 1756.0 / 625.0 },
   };
   
-  private static final double normTolerance = 10.0e-14;
+  private static final double NORM_TOLERANCE = 10.0e-14;
   
   @Test
   public void testMoreRows() {
@@ -158,7 +158,7 @@ public final class TestSingularValueDeco
     }
     
     double norm = Algebra.getNorm(u.times(s).times(v.transpose()).minus(matrix));
-    assertEquals(0, norm, normTolerance);
+    assertEquals(0, norm, NORM_TOLERANCE);
     
   }
   
@@ -184,7 +184,7 @@ public final class TestSingularValueDeco
     for (int i = 0; i < mTm.numRows(); i++) {
       id.set(i, i, 1);
     }
-    assertEquals(0, Algebra.getNorm(mTm.minus(id)), normTolerance);
+    assertEquals(0, Algebra.getNorm(mTm.minus(id)), NORM_TOLERANCE);
   }
   
   /** test matrices values */
@@ -208,11 +208,11 @@ public final class TestSingularValueDeco
     // check values against known references
     Matrix u = svd.getU();
     
-    assertEquals(0,  Algebra.getNorm(u.minus(uRef)), normTolerance);
+    assertEquals(0,  Algebra.getNorm(u.minus(uRef)), NORM_TOLERANCE);
     Matrix s = svd.getS();
-    assertEquals(0,  Algebra.getNorm(s.minus(sRef)), normTolerance);
+    assertEquals(0,  Algebra.getNorm(s.minus(sRef)), NORM_TOLERANCE);
     Matrix v = svd.getV();
-    assertEquals(0,  Algebra.getNorm(v.minus(vRef)), normTolerance);
+    assertEquals(0,  Algebra.getNorm(v.minus(vRef)), NORM_TOLERANCE);
   }
   
   

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java Sat Sep 25 09:51:42 2010
@@ -143,7 +143,7 @@ public final class TestHebbianSolver ext
     _eigensVectorFactory = new DenseMapVectorFactory();
     int desiredRank = 200;
     long time = timeSolver(TMP_EIGEN_DIR,
-                           0.00001, 
+                           0.00001,
                            5, 
                            desiredRank,
                            new TrainingState());

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Sat Sep 25 09:51:42 2010
@@ -90,37 +90,37 @@ public final class SequenceFileDumper {
         } else {
           writer = new OutputStreamWriter(System.out);
         }
-        writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
-        
-        int sub = Integer.MAX_VALUE;
-        if (cmdLine.hasOption(substringOpt)) {
-          sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
-        }
-        boolean countOnly = cmdLine.hasOption(countOpt);
-        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
-        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
-        writer.append("Key class: ").append(String.valueOf(reader.getKeyClass()));
-        writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n');
-        writer.flush();
-        long count = 0;
-        if (countOnly) {
-          while (reader.next(key, value)) {
-            count++;
+        try {
+          writer.append("Input Path: ").append(String.valueOf(path)).append('\n');
+
+          int sub = Integer.MAX_VALUE;
+          if (cmdLine.hasOption(substringOpt)) {
+            sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
           }
-          writer.append("Count: ").append(String.valueOf(count)).append('\n');
-        } else {
-          while (reader.next(key, value)) {
-            writer.append("Key: ").append(String.valueOf(key));
-            String str = value.toString();
-            writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
-            writer.write('\n');
-            writer.flush();
-            count++;
+          boolean countOnly = cmdLine.hasOption(countOpt);
+          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+          Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
+          writer.append("Key class: ").append(String.valueOf(reader.getKeyClass()));
+          writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n');
+          writer.flush();
+          long count = 0;
+          if (countOnly) {
+            while (reader.next(key, value)) {
+              count++;
+            }
+            writer.append("Count: ").append(String.valueOf(count)).append('\n');
+          } else {
+            while (reader.next(key, value)) {
+              writer.append("Key: ").append(String.valueOf(key));
+              String str = value.toString();
+              writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
+              writer.write('\n');
+              writer.flush();
+              count++;
+            }
+            writer.append("Count: ").append(String.valueOf(count)).append('\n');
           }
-          writer.append("Count: ").append(String.valueOf(count)).append('\n');
-        }
-        writer.flush();
-        if (cmdLine.hasOption(outputOpt)) {
+        } finally {
           writer.close();
         }
       }

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sat Sep 25 09:51:42 2010
@@ -73,13 +73,13 @@ public final class ClusterDumper extends
 
   private Path seqFileDir;
 
-  private Path pointsDir = null;
+  private Path pointsDir;
 
-  private String termDictionary = null;
+  private String termDictionary;
 
-  private String dictionaryFormat = null;
+  private String dictionaryFormat;
 
-  private String outputFile = null;
+  private String outputFile;
 
   private int subString = Integer.MAX_VALUE;
 
@@ -109,9 +109,11 @@ public final class ClusterDumper extends
     addOption(OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console.");
     addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
     addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
-    addOption(JSON_OPTION, "j", "Output the centroid as JSON.  Otherwise it substitues in the terms for vector cell entries");
-    addOption(POINTS_DIR_OPTION, "p", "The directory containing points sequence files mapping input vectors to their cluster.  "
-        + "If specified, then the program will output the points associated with a cluster");
+    addOption(JSON_OPTION, "j",
+        "Output the centroid as JSON.  Otherwise it substitues in the terms for vector cell entries");
+    addOption(POINTS_DIR_OPTION, "p",
+        "The directory containing points sequence files mapping input vectors to their cluster.  "
+            + "If specified, then the program will output the points associated with a cluster");
     addOption(DICTIONARY_OPTION, "d", "The dictionary file");
     addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text");
     if (parseArguments(args) == null) {
@@ -256,11 +258,8 @@ public final class ClusterDumper extends
     return this.numTopFeatures;
   }
 
-  private void setUseJSON(boolean json) {
-    this.useJSON = json;
-  }
-
-  private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, Configuration conf) throws IOException {
+  private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir,
+                                                                       Configuration conf) throws IOException {
     Map<Integer, List<WeightedVectorWritable>> result = new TreeMap<Integer, List<WeightedVectorWritable>>();
 
     FileSystem fs = pointsPathDir.getFileSystem(conf);

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java Sat Sep 25 09:51:42 2010
@@ -29,8 +29,7 @@ import org.apache.mahout.math.Varint;
 import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type;
 
 /** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */
-public class GramKey extends BinaryComparable implements
-    WritableComparable<BinaryComparable> {
+public final class GramKey extends BinaryComparable implements WritableComparable<BinaryComparable> {
 
   private int primaryLength;
   private int length;

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java Sat Sep 25 09:51:42 2010
@@ -18,7 +18,6 @@
 package org.apache.mahout.utils.vectors.common;
 
 import java.io.IOException;
-import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -71,13 +70,10 @@ public final class PartialVectorMerger {
    * @param dimension
    * @param sequentialAccess
    *          output vectors should be optimized for sequential access
-   * @param namedVectors
+   * @param namedVector
    *          output vectors should be named, retaining key (doc id) as a label
    * @param numReducers 
    *          The number of reducers to spawn
-   * @throws IOException
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   public static void mergePartialVectors(Iterable<Path> partialVectorPaths,
                                          Path output,
@@ -122,7 +118,7 @@ public final class PartialVectorMerger {
   }
 
   private static String getCommaSeparatedPaths(Iterable<Path> paths) {
-    StringBuilder commaSeparatedPaths = new StringBuilder();
+    StringBuilder commaSeparatedPaths = new StringBuilder(100);
     String sep = "";
     for (Path path : paths) {
       commaSeparatedPaths.append(sep).append(path.toString());

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Sat Sep 25 09:51:42 2010
@@ -29,7 +29,6 @@ import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -180,8 +179,8 @@ public class ClusterLabels {
   /**
    * Get the list of labels, sorted by best score.
    */
-  protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, Collection<WeightedVectorWritable> wvws)
-      throws IOException {
+  protected List<TermInfoClusterInOut> getClusterLabels(Integer integer,
+                                                        Collection<WeightedVectorWritable> wvws) throws IOException {
 
     if (wvws.size() < minNumIds) {
       log.info("Skipping small cluster {} with size: {}", integer, wvws.size());
@@ -267,8 +266,9 @@ public class ClusterLabels {
     return clusteredTermInfo.subList(0, Math.min(clusteredTermInfo.size(), maxLabels));
   }
 
-  private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField)
-      throws IOException {
+  private static OpenBitSet getClusterDocBitset(IndexReader reader,
+                                                Collection<String> idSet,
+                                                String idField) throws IOException {
     int numDocs = reader.numDocs();
 
     OpenBitSet bitset = new OpenBitSet(numDocs);

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Sat Sep 25 09:51:42 2010
@@ -141,8 +141,8 @@ public final class Driver {
       if (cmdLine.hasOption(inputOpt)) { // Lucene case
         File file = new File(cmdLine.getValue(inputOpt).toString());
         if (!file.isDirectory()) {
-          throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath() + 
-              " does not exist or is not a directory");
+          throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath()
+              +  " does not exist or is not a directory");
         }
 
         long maxDocs = Long.MAX_VALUE;

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java Sat Sep 25 09:51:42 2010
@@ -25,6 +25,6 @@ package org.apache.mahout.utils;
 public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
 
   /** "Close enough" value for floating-point comparisons. */
-  public static final double EPSILON = 0.0000001;
+  public static final double EPSILON = 0.000001;
 
 }