You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/08/30 20:41:48 UTC

svn commit: r990892 [2/2] - in /mahout/trunk: buildtools/src/main/resources/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ core/src/main/java/org/apache/mahout/clusterin...

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java Mon Aug 30 18:41:46 2010
@@ -107,8 +107,7 @@ public class WikipediaMapper extends Map
       throw new IllegalStateException(ex);
     }
     log.info("Configure: Input Categories size: {} All: {} Exact Match: {}",
-             new Object[] { inputCategories.size(), all,
-        exactMatchOnly });
+             new Object[] {inputCategories.size(), all, exactMatchOnly});
   }
 
   private static String getDocument(String xml) {

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java Mon Aug 30 18:41:46 2010
@@ -33,7 +33,7 @@ import org.apache.mahout.ga.watchmaker.c
 
 public class CDReducerTest extends MahoutTestCase {
 
-  private static final int nbevals = 100;
+  private static final int NUM_EVALS = 100;
 
   private List<CDFitness> evaluations;
 
@@ -49,7 +49,7 @@ public class CDReducerTest extends Mahou
     int fp = 0;
     int tn = 0;
     int fn = 0;
-    for (int index = 0; index < nbevals; index++) {
+    for (int index = 0; index < NUM_EVALS; index++) {
       CDFitness fitness = new CDFitness(rng.nextInt(100), rng.nextInt(100), rng.nextInt(100), rng.nextInt(100));
       tp += fitness.getTp();
       fp += fitness.getFp();

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java Mon Aug 30 18:41:46 2010
@@ -30,8 +30,10 @@ import java.util.Date;
 /**
  * Quick medium quality uniform pseudo-random number generator.
  *
- * Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively, 
- * as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt> and <tt>(0.0,1.0)</tt>, respectively.
+ * Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals
+ * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively,
+ * as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt>
+ * and <tt>(0.0,1.0)</tt>, respectively.
  * <p>
  * The seed can be any integer satisfying <tt>0 &lt; 4*seed+1 &lt; 2<sup>32</sup></tt>.
  * In other words, there must hold <tt>seed &gt;= 0 && seed &lt; 1073741823</tt>.
@@ -83,7 +85,8 @@ public class DRand extends RandomEngine 
   }
 
   /**
-   * Returns a 32 bit uniformly distributed random number in the closed interval <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
+   * Returns a 32 bit uniformly distributed random number in the closed interval
+   * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
    * (including <tt>Integer.MIN_VALUE</tt> and <tt>Integer.MAX_VALUE</tt>).
    */
   @Override

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Mon Aug 30 18:41:46 2010
@@ -16,9 +16,12 @@ import org.apache.mahout.math.list.IntAr
 /**
  * Conveniently computes a stable subsequence of elements from a given input sequence;
  * Picks (samples) exactly one random element from successive blocks of <tt>weight</tt> input elements each.
- * For example, if weight==2 (a block is 2 elements), and the input is 5*2=10 elements long, then picks 5 random elements from the 10 elements such that
- * one element is randomly picked from the first block, one element from the second block, ..., one element from the last block.
- * weight == 1.0 --> all elements are picked (sampled). weight == 10.0 --> Picks one random element from successive blocks of 10 elements each. Etc.
+ * For example, if weight==2 (a block is 2 elements), and the input is 5*2=10 elements long,
+ * then picks 5 random elements from the 10 elements such that
+ * one element is randomly picked from the first block, one element from the second block, ...,
+ * one element from the last block.
+ * weight == 1.0 --> all elements are picked (sampled). weight == 10.0 --> Picks one random
+ * element from successive blocks of 10 elements each. Etc.
  * The subsequence is guaranteed to be <i>stable</i>, i.e. elements never change position relative to each other.
  *
  */

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java Mon Aug 30 18:41:46 2010
@@ -46,25 +46,25 @@ class KnownDoubleQuantileEstimator exten
 
   protected RandomSamplingAssistant samplingAssistant;
   protected final double samplingRate; // see method sampleNextElement()
-  protected final long N; // see method sampleNextElement()
+  protected final long n; // see method sampleNextElement()
 
   /**
    * Constructs an approximate quantile finder with b buffers, each having k elements.
    *
    * @param b            the number of buffers
    * @param k            the number of elements per buffer
-   * @param N            the total number of elements over which quantiles are to be computed.
+   * @param n            the total number of elements over which quantiles are to be computed.
    * @param samplingRate 1.0 --> all elements are consumed. 10.0 --> Consumes one random element from successive blocks
    *                     of 10 elements each. Etc.
    * @param generator    a uniform random number generator.
    */
-  KnownDoubleQuantileEstimator(int b, int k, long N, double samplingRate, RandomEngine generator) {
+  KnownDoubleQuantileEstimator(int b, int k, long n, double samplingRate, RandomEngine generator) {
     this.samplingRate = samplingRate;
-    this.N = N;
+    this.n = n;
 
     this.samplingAssistant = this.samplingRate <= 1.0
         ? null
-        : new RandomSamplingAssistant(Arithmetic.floor(N / samplingRate), N, generator);
+        : new RandomSamplingAssistant(Arithmetic.floor(n / samplingRate), n, generator);
 
     setUp(b, k);
     this.clear();
@@ -123,7 +123,7 @@ class KnownDoubleQuantileEstimator exten
     RandomSamplingAssistant assist = this.samplingAssistant;
     if (assist != null) {
       this.samplingAssistant =
-          new RandomSamplingAssistant(Arithmetic.floor(N / samplingRate), N, assist.getRandomGenerator());
+          new RandomSamplingAssistant(Arithmetic.floor(n / samplingRate), n, assist.getRandomGenerator());
     }
   }
 
@@ -194,17 +194,19 @@ class KnownDoubleQuantileEstimator exten
    */
   @Override
   public DoubleArrayList quantileElements(DoubleArrayList phis) {
-    /*
-  * The KNOWN quantile finder reads off quantiles from FULL buffers only.
-  * Since there might be a partially full buffer, this method first satisfies this constraint by temporarily filling a few +infinity, -infinity elements to make up a full block.
-  * This is in full conformance with the explicit approximation guarantees.
-   *
-   * For those of you working on online apps:
-    * The approximation guarantees are given for computing quantiles AFTER N elements have been filled, not for intermediate displays.
-  * If you have one thread filling and another thread displaying concurrently, you will note that in the very beginning the infinities will dominate the display.
-   * This could confuse users, because, of course, they don't expect any infinities, even if they "disappear" after a short while.
-   * To prevent panic exclude phi's close to zero or one in the early phases of processing.
-    */
+    // The KNOWN quantile finder reads off quantiles from FULL buffers only.
+    // Since there might be a partially full buffer, this method first satisfies this constraint by 
+    // temporarily filling a few +infinity, -infinity elements to make up a full block.
+    // This is in full conformance with the explicit approximation guarantees.
+    //
+    // For those of you working on online apps:
+    // The approximation guarantees are given for computing quantiles AFTER N elements have
+    // been filled, not for intermediate displays.
+    // If you have one thread filling and another thread displaying concurrently, you will
+    // note that in the very beginning the infinities will dominate the display.
+    // This could confuse users, because, of course, they don't expect any infinities,
+    // even if they "disappear" after a short while.
+    // To prevent panic exclude phi's close to zero or one in the early phases of processing.
     DoubleBuffer partial = this.bufferSet._getPartialBuffer();
     int missingValues = 0;
     if (partial != null) { // any auxiliary infinities needed?

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java Mon Aug 30 18:41:46 2010
@@ -90,7 +90,8 @@ class UnknownDoubleQuantileEstimator ext
 
     sortAscendingByLevel(fullBuffers);
 
-    // if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
+    // if there is only one buffer at the lowest level, then increase its level
+    // so that there are at least two at the lowest level.
     int minLevel = fullBuffers[1].level();
     if (fullBuffers[0].level() < minLevel) {
       fullBuffers[0].level(minLevel);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -43,7 +43,7 @@ public abstract class AbstractMatrix1D e
    * @param absRank the absolute rank of the element.
    * @return the position.
    */
-  protected int _offset(int absRank) {
+  protected int offset(int absRank) {
     return absRank;
   }
 
@@ -124,7 +124,7 @@ public abstract class AbstractMatrix1D e
    * @param rank the rank of the element.
    */
   protected int index(int rank) {
-    return _offset(_rank(rank));
+    return offset(_rank(rank));
   }
 
   /**

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -77,7 +77,7 @@ class SelectedDenseDoubleMatrix1D extend
    * @return the position.
    */
   @Override
-  protected int _offset(int absRank) {
+  protected int offset(int absRank) {
     return offsets[absRank];
   }
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -80,7 +80,7 @@ class SelectedSparseDoubleMatrix1D exten
    * @return the position.
    */
   @Override
-  protected int _offset(int absRank) {
+  protected int offset(int absRank) {
     return offsets[absRank];
   }
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java Mon Aug 30 18:41:46 2010
@@ -31,11 +31,11 @@ public class LUDecomposition implements 
    * Constructs and returns a new LU Decomposition object; The decomposed matrices can be retrieved via instance methods
    * of the returned decomposition object.
    *
-   * @param A Rectangular matrix
+   * @param a Rectangular matrix
    */
-  public LUDecomposition(DoubleMatrix2D A) {
+  public LUDecomposition(DoubleMatrix2D a) {
     quick = new LUDecompositionQuick(0); // zero tolerance for compatibility with Jama
-    quick.decompose(A.copy());
+    quick.decompose(a.copy());
   }
 
   /**
@@ -52,9 +52,11 @@ public class LUDecomposition implements 
    *
    * @return (double) piv
    */
+  /*
   private double[] getDoublePivot() {
     return quick.getDoublePivot();
   }
+   */
 
   /**
    * Returns the lower triangular factor, <tt>L</tt>.
@@ -95,17 +97,17 @@ public class LUDecomposition implements 
   /**
    * Solves <tt>A*X = B</tt>.
    *
-   * @param B A matrix with as many rows as <tt>A</tt> and any number of columns.
+   * @param b A matrix with as many rows as <tt>A</tt> and any number of columns.
    * @return <tt>X</tt> so that <tt>L*U*X = B(piv,:)</tt>.
    * @throws IllegalArgumentException if </tt>B.rows() != A.rows()</tt>.
    * @throws IllegalArgumentException if A is singular, that is, if <tt>!this.isNonsingular()</tt>.
    * @throws IllegalArgumentException if <tt>A.rows() < A.columns()</tt>.
    */
 
-  public DoubleMatrix2D solve(DoubleMatrix2D B) {
-    DoubleMatrix2D X = B.copy();
-    quick.solve(X);
-    return X;
+  public DoubleMatrix2D solve(DoubleMatrix2D b) {
+    DoubleMatrix2D x = b.copy();
+    quick.solve(x);
+    return x;
   }
 
   /**

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java Mon Aug 30 18:41:46 2010
@@ -39,7 +39,8 @@ import org.apache.mahout.math.list.Doubl
  * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.105.1580
  */
 public class OnlineSummarizer {
-  boolean sorted = true;
+
+  private boolean sorted = true;
 
   // the first several samples are kept so we can boot-strap our estimates cleanly
   private DoubleArrayList starter = new DoubleArrayList(100);
@@ -67,11 +68,11 @@ public class OnlineSummarizer {
       starter.add(sample);
     } else if (n == 100) {
       starter.add(sample);
-      q[0] = min();
-      q[1] = quartile(1);
-      q[2] = quartile(2);
-      q[3] = quartile(3);
-      q[4] = max();
+      q[0] = getMin();
+      q[1] = getQuartile(1);
+      q[2] = getQuartile(2);
+      q[3] = getQuartile(3);
+      q[4] = getMax();
       starter = null;
     } else {
       q[0] = Math.min(sample, q[0]);
@@ -92,19 +93,19 @@ public class OnlineSummarizer {
     }
   }
 
-  public int count() {
+  public int getCount() {
     return n;
   }
 
-  public double mean() {
+  public double getMean() {
     return mean;
   }
 
-  public double sd() {
+  public double getSD() {
     return Math.sqrt(variance);
   }
 
-  public double min() {
+  public double getMin() {
     sort();
     if (n == 0) {
       throw new IllegalArgumentException("Must have at least one sample to estimate minimum value");
@@ -119,7 +120,7 @@ public class OnlineSummarizer {
     }
   }
 
-  public double max() {
+  public double getMax() {
     sort();
     if (n == 0) {
       throw new IllegalArgumentException("Must have at least one sample to estimate maximum value");
@@ -127,11 +128,11 @@ public class OnlineSummarizer {
     return n <= 100 ? starter.get(99) : q[4];
   }
 
-  public double quartile(int i) {
+  public double getQuartile(int i) {
     sort();
     switch (i) {
       case 0:
-        return min();
+        return getMin();
       case 1:
       case 2:
       case 3:
@@ -146,13 +147,13 @@ public class OnlineSummarizer {
           return starter.get(k) * (1 - u) + starter.get(k + 1) * u;
         }
       case 4:
-        return max();
+        return getMax();
       default:
         throw new IllegalArgumentException("Quartile number must be in the range [0..4] not " + i);
     }
   }
 
-  public double median() {
-    return quartile(2);
+  public double getMedian() {
+    return getQuartile(2);
   }
 }

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java Mon Aug 30 18:41:46 2010
@@ -65,10 +65,8 @@ public abstract class MahoutTestCase ext
   private File getTestTempFileOrDir(String name, boolean dir) throws IOException {
     File f = new File(getTestTempDir(), name);
     f.deleteOnExit();
-    if (dir) {
-      if (!f.mkdirs()) {
-        throw new IOException("Could not make directory " + f);
-      }
+    if (dir && !f.mkdirs()) {
+      throw new IOException("Could not make directory " + f);
     }
     return f;
   }

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java Mon Aug 30 18:41:46 2010
@@ -35,6 +35,7 @@ import java.util.Date;
  * uses a legacy method to initialize the PRNG state. <ul>
  */
 public class MersenneTwisterTest {
+
   /**
    * Convert an unsigned int stored in a long to a double in a fashion compatible with the C
    * reference implementation
@@ -42,12 +43,12 @@ public class MersenneTwisterTest {
    * @param y The long value to convert
    * @return A double in the range [0..1)
    */
-  private double toDouble(long y) {
+  private static double toDouble(long y) {
     return (double) (y & 0xffffffffL) * 2.3283064365386963e-10;
   }
 
   @Test
-  public void test1000_1() {
+  public void test10001() {
     MersenneTwister r = new MersenneTwister();
     r.setReferenceSeed(4357);
 
@@ -62,7 +63,7 @@ public class MersenneTwisterTest {
     r.setReferenceSeed(4357);
     i = 0;
     for (Double x : ref1) {
-      Assert.assertEquals("t-ref-double-" + i, x, toDouble(r.nextInt()), 1e-7);
+      Assert.assertEquals("t-ref-double-" + i, x, toDouble(r.nextInt()), 1.0e-7);
       i++;
     }
   }
@@ -72,7 +73,7 @@ public class MersenneTwisterTest {
     MersenneTwister r = new MersenneTwister(42);
     int i = 0;
     for (double x : reference3) {
-      Assert.assertEquals("t-regression-"+i, x, r.nextDouble(), 1e-7);
+      Assert.assertEquals("t-regression-"+i, x, r.nextDouble(), 1.0e-7);
       i++;
     }
 
@@ -108,7 +109,7 @@ public class MersenneTwisterTest {
 
   // output of first 1000 values of nextInt() as determined by a slight modification of
   // the reference C implementation
-  private long[] reference1 = new long[]{
+  private final long[] reference1 = {
           -1427748157, 1585203162, -1181843167, -1341066457, -1831172428,
           -812701500, 1164297043, -696771727, 589972756, -182733429,
           767115311, -201891849, 1322433849, -937881972, -994918828,
@@ -313,7 +314,7 @@ public class MersenneTwisterTest {
 
   // the first 1000 values output by the reference implementation
   // data from http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/991029/mt19937-2.out
-  private double[] ref1 = new double[]{
+  private final double[] ref1 = {
           0.66757648, 0.36908387, 0.72483069, 0.68775863, 0.57364694,
           0.81077819, 0.27108403, 0.83777019, 0.13736374, 0.95745406,
           0.17860795, 0.95299339, 0.30790312, 0.78163234, 0.76835241,
@@ -516,7 +517,7 @@ public class MersenneTwisterTest {
           0.44807063, 0.06424586, 0.75766097, 0.40567560, 0.23996701
   };
 
-  private double[] reference3 = new double[]{
+  private final double[] reference3 = {
           0.09575212, 0.03326592, 0.10697744, 0.37992924, 0.13844205,
           0.07550350, 0.89109388, 0.50151205, 0.79222822, 0.41823034,
           0.84440729, 0.66568635, 0.87830555, 0.17024311, 0.43956848,

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java Mon Aug 30 18:41:46 2010
@@ -17,8 +17,6 @@
 
 package org.apache.mahout.math.stats;
 
-import org.apache.mahout.math.jet.random.Gamma;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -28,13 +26,13 @@ public class OnlineSummarizerTest {
   @Test
   public void testCount() {
     OnlineSummarizer x = new OnlineSummarizer();
-    Assert.assertEquals(0, x.count());
+    Assert.assertEquals(0, x.getCount());
     x.add(1);
-    Assert.assertEquals(1, x.count());
+    Assert.assertEquals(1, x.getCount());
 
     for (int i = 2; i < 110; i++) {
       x.add(i);
-      Assert.assertEquals(i, x.count());
+      Assert.assertEquals(i, x.getCount());
     }
   }
 
@@ -80,17 +78,17 @@ public class OnlineSummarizerTest {
 
   private static void check(OnlineSummarizer x, double... values) {
     for (int i = 0; i < 5; i++) {
-      checkRange("quartile " + i, x.quartile(i), values[2 * i], values[2 * i + 1]);
+      checkRange("quartile " + i, x.getQuartile(i), values[2 * i], values[2 * i + 1]);
     }
-    Assert.assertEquals(x.quartile(2), x.median(), 0);
+    Assert.assertEquals(x.getQuartile(2), x.getMedian(), 0);
 
-    checkRange("mean", x.mean(), values[10], values[11]);
-    checkRange("sd", x.sd(), values[12], values[13]);
+    checkRange("mean", x.getMean(), values[10], values[11]);
+    checkRange("sd", x.getSD(), values[12], values[13]);
   }
 
   private static void checkRange(String msg, double v, double low, double high) {
     if (v < low || v > high) {
-      Assert.fail("Wanted " + msg + " to be in range [" + low + "," + high + "] but got " + v);
+      Assert.fail("Wanted " + msg + " to be in range [" + low + ',' + high + "] but got " + v);
     }
   }
 
@@ -112,6 +110,7 @@ public class OnlineSummarizerTest {
     return x;
   }
 
+  /*
   private static OnlineSummarizer gamma(int n, int seed) {
     OnlineSummarizer x = new OnlineSummarizer();
     Gamma g = new Gamma(0.01, 100, new MersenneTwister(seed));
@@ -120,6 +119,8 @@ public class OnlineSummarizerTest {
     }
     return x;
   }
+   */
+
 }
 
 

Modified: mahout/trunk/maven/src/main/resources/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/maven/src/main/resources/findbugs-exclude.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/maven/src/main/resources/findbugs-exclude.xml (original)
+++ mahout/trunk/maven/src/main/resources/findbugs-exclude.xml Mon Aug 30 18:41:46 2010
@@ -10,18 +10,21 @@
     <Bug pattern="SE_NO_SERIALVERSIONID"/>
   </Match>
   <Match>
+    <Bug pattern="EI_EXPOSE_REP"/>
+  </Match>
+  <Match>
     <Bug pattern="EI_EXPOSE_REP2"/>
- </Match>
- <Match>
+  </Match>
+  <Match>
     <Bug pattern="SIC_INNER_SHOULD_BE_STATIC_ANON"/>
- </Match>
- <Match>
+  </Match>
+  <Match>
     <Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"/>
   </Match>
- <Match>
+  <Match>
     <Bug pattern="SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"/>
   </Match>
- <Match>
+  <Match>
     <Bug pattern="SE_BAD_FIELD"/>
   </Match>
   <Match>

Modified: mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
     <rule ref="rulesets/basic.xml/BooleanInstantiation"/>
     <rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
     <rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
-    <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+    <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
     <rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
     <rule ref="rulesets/basic.xml/EmptyIfStmt"/>
     <rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
     <!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
     <!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
 
-    <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+    <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
     <rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
     <rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
     <!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
     <!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
     <!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
-    <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+    <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
     <!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
     <rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
     <!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
     <!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
-    <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+    <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
     <!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
     <rule ref="rulesets/naming.xml/NoPackage"/>
     <rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
     <!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
     <!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
     <!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
-    <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+    <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
     <rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
 
     <!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
     <!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
 
-    <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+    <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
     <rule ref="rulesets/strings.xml/StringInstantiation"/>
     <rule ref="rulesets/strings.xml/StringToString"/>
     <!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java Mon Aug 30 18:41:46 2010
@@ -133,7 +133,7 @@ public final class CDbwDriver extends Ab
     conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
     // now print out the Results
-    System.out.println("CDbw = " + evaluator.CDbw());
+    System.out.println("CDbw = " + evaluator.getCDbw());
     System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
     System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Mon Aug 30 18:41:46 2010
@@ -85,7 +85,7 @@ public class CDbwEvaluator {
     }
   }
 
-  public double CDbw() {
+  public double getCDbw() {
     return intraClusterDensity() * separation();
   }
 

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Aug 30 18:41:46 2010
@@ -66,7 +66,7 @@ public final class SequenceFileDumper {
     withDescription("Print out help").withShortName("h").create();
     
     Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt)
-    .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
+      .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
     
     try {
       Parser parser = new Parser();
@@ -104,18 +104,18 @@ public final class SequenceFileDumper {
         .append(String.valueOf(value.getClass())).append('\n');
         writer.flush();
         long count = 0;
-        if (!countOnly) {
+        if (countOnly) {
           while (reader.next(key, value)) {
-            writer.append("Key: ").append(String.valueOf(key));
-            String str = value.toString();
-            writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
-            writer.write('\n');
-            writer.flush();
             count++;
           }
           writer.append("Count: ").append(String.valueOf(count)).append('\n');
         } else {
           while (reader.next(key, value)) {
+            writer.append("Key: ").append(String.valueOf(key));
+            String str = value.toString();
+            writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
+            writer.write('\n');
+            writer.flush();
             count++;
           }
           writer.append("Count: ").append(String.valueOf(count)).append('\n');

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java Mon Aug 30 18:41:46 2010
@@ -75,10 +75,10 @@ public final class CollocDriver extends 
               "ng",
               "(Optional) The max size of ngrams to create (2 = bigrams, 3 = trigrams, etc) default: 2",
               String.valueOf(DEFAULT_MAX_NGRAM_SIZE));
-    addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: " + CollocReducer.DEFAULT_MIN_SUPPORT, String
-        .valueOf(CollocReducer.DEFAULT_MIN_SUPPORT));
-    addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float)  Default is " + LLRReducer.DEFAULT_MIN_LLR, String
-        .valueOf(LLRReducer.DEFAULT_MIN_LLR));
+    addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: "
+        + CollocReducer.DEFAULT_MIN_SUPPORT, String.valueOf(CollocReducer.DEFAULT_MIN_SUPPORT));
+    addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
+        + LLRReducer.DEFAULT_MIN_LLR, String.valueOf(LLRReducer.DEFAULT_MIN_LLR));
     addOption(DefaultOptionCreator.overwriteOption().create());
     addOption("analyzerName", "a", "The class name of the analyzer to use for preprocessing", null);
 
@@ -150,7 +150,8 @@ public final class CollocDriver extends 
     }
 
     // parse input and extract collocations
-    long ngramCount = generateCollocations(input, output, getConf(), emitUnigrams, maxNGramSize, reduceTasks, minSupport);
+    long ngramCount =
+      generateCollocations(input, output, getConf(), emitUnigrams, maxNGramSize, reduceTasks, minSupport);
 
     // tally collocations and perform LLR calculation
     computeNGramsPruneByLLR(output, getConf(), ngramCount, emitUnigrams, minLLRValue, reduceTasks);
@@ -173,9 +174,6 @@ public final class CollocDriver extends 
    *          minimum threshold to prune ngrams
    * @param reduceTasks
    *          number of reducers used
-   * @throws IOException
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   public static void generateAllGrams(Path input,
                                       Path output,
@@ -183,7 +181,8 @@ public final class CollocDriver extends 
                                       int maxNGramSize,
                                       int minSupport,
                                       float minLLRValue,
-                                      int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException {
+                                      int reduceTasks)
+    throws IOException, InterruptedException, ClassNotFoundException {
     // parse input and extract collocations
     long ngramCount = generateCollocations(input, output, baseConf, true, maxNGramSize, reduceTasks, minSupport);
 

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java Mon Aug 30 18:41:46 2010
@@ -18,7 +18,6 @@
 package org.apache.mahout.utils.nlp.collocations.llr;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
@@ -67,7 +66,7 @@ public class LLRReducer extends Reducer<
   @Override
   protected void reduce(Gram ngram, Iterable<Gram> values, Context context) throws IOException, InterruptedException {
 
-    int[] gramFreq = { -1, -1 };
+    int[] gramFreq = {-1, -1};
 
     if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
       DoubleWritable dd = new DoubleWritable(ngram.getFrequency());

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java Mon Aug 30 18:41:46 2010
@@ -18,7 +18,6 @@
 package org.apache.mahout.utils.vectors.common;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.WritableComparable;

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Mon Aug 30 18:41:46 2010
@@ -70,7 +70,7 @@ import org.slf4j.LoggerFactory;
  */
 public class ClusterLabels {
 
-  class TermInfoClusterInOut implements Comparable<TermInfoClusterInOut> {
+  static class TermInfoClusterInOut implements Comparable<TermInfoClusterInOut> {
     private final String term;
 
     private final int inClusterDF;

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java Mon Aug 30 18:41:46 2010
@@ -110,9 +110,6 @@ public final class DictionaryVectorizer 
    *          available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we
    *          recommend you use a split size of around 400-500MB so that two simultaneous reducers can create
    *          partial vectors without thrashing the system due to increased swapping
-   * @throws IOException
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   public static void createTermFrequencyVectors(Path input,
                                                 Path output,
@@ -122,7 +119,8 @@ public final class DictionaryVectorizer 
                                                 float minLLRValue,
                                                 int numReducers,
                                                 int chunkSizeInMegabytes,
-                                                boolean sequentialAccess) throws IOException, InterruptedException, ClassNotFoundException {
+                                                boolean sequentialAccess)
+    throws IOException, InterruptedException, ClassNotFoundException {
     if (chunkSizeInMegabytes < MIN_CHUNKSIZE) {
       chunkSizeInMegabytes = MIN_CHUNKSIZE;
     } else if (chunkSizeInMegabytes > MAX_CHUNKSIZE) { // 10GB
@@ -138,12 +136,12 @@ public final class DictionaryVectorizer 
     List<Path> dictionaryChunks;
     if (maxNGramSize == 1) {
       startWordCounting(input, dictionaryJobPath, minSupport);
-      dictionaryChunks = createDictionaryChunks(minSupport, dictionaryJobPath, output,
+      dictionaryChunks = createDictionaryChunks(dictionaryJobPath, output,
         chunkSizeInMegabytes, new LongWritable(), maxTermDimension);
     } else {
       CollocDriver.generateAllGrams(input, dictionaryJobPath, baseConf, maxNGramSize,
         minSupport, minLLRValue, numReducers);
-      dictionaryChunks = createDictionaryChunks(minSupport, new Path(
+      dictionaryChunks = createDictionaryChunks(new Path(
           new Path(output, DICTIONARY_JOB_FOLDER), CollocDriver.NGRAM_OUTPUT_DIRECTORY), output,
         chunkSizeInMegabytes, new DoubleWritable(), maxTermDimension);
     }
@@ -176,13 +174,11 @@ public final class DictionaryVectorizer 
    * Read the feature frequency List which is built at the end of the Word Count Job and assign ids to them.
    * This will use constant memory and will run at the speed of your disk read
    * 
-   * @param minSupport
    * @param wordCountPath
    * @param dictionaryPathBase
    * @throws IOException
    */
-  private static List<Path> createDictionaryChunks(int minSupport,
-                                                   Path wordCountPath,
+  private static List<Path> createDictionaryChunks(Path wordCountPath,
                                                    Path dictionaryPathBase,
                                                    int chunkSizeInMegabytes,
                                                    Writable value,

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java Mon Aug 30 18:41:46 2010
@@ -112,9 +112,6 @@ public final class TFIDFConverter {
    *          The number of reducers to spawn. This also affects the possible parallelism since each reducer
    *          will typically produce a single output file containing tf-idf vectors for a subset of the
    *          documents in the corpus.
-   * @throws IOException
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   public static void processTfIdf(Path input,
                                   Path output,
@@ -184,10 +181,6 @@ public final class TFIDFConverter {
   /**
    * Read the document frequency List which is built at the end of the DF Count Job. This will use constant
    * memory and will run at the speed of your disk read
-   * 
-   * @param featureCountPath
-   * @param dictionaryPathBase
-   * @throws IOException
    */
   private static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath,
                                                                  Path dictionaryPathBase,
@@ -205,7 +198,8 @@ public final class TFIDFConverter {
     int chunkIndex = 0;
     Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
     chunkPaths.add(chunkPath);
-    SequenceFile.Writer freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
+    SequenceFile.Writer freqWriter =
+      new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
 
     long currentChunkSize = 0;
     long featureCount = 0;
@@ -239,7 +233,7 @@ public final class TFIDFConverter {
     }
     featureCount++;
     freqWriter.close();
-    Long[] counts = { featureCount, vectorCount };
+    Long[] counts = {featureCount, vectorCount};
     return new Pair<Long[], List<Path>>(counts, chunkPaths);
   }
 
@@ -262,9 +256,6 @@ public final class TFIDFConverter {
    *          location of the chunk of features and the id's
    * @param output
    *          output directory were the partial vectors have to be created
-   * @throws IOException
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
   private static void makePartialVectors(Path input,
                                          Long featureCount,
@@ -273,7 +264,8 @@ public final class TFIDFConverter {
                                          int maxDFPercent,
                                          Path dictionaryFilePath,
                                          Path output,
-                                         boolean sequentialAccess) throws IOException, InterruptedException, ClassNotFoundException {
+                                         boolean sequentialAccess)
+    throws IOException, InterruptedException, ClassNotFoundException {
 
     Configuration conf = new Configuration();
     // this conf parameter needs to be set enable serialisation of conf values
@@ -284,10 +276,11 @@ public final class TFIDFConverter {
     conf.setInt(MIN_DF, minDf);
     conf.setInt(MAX_DF_PERCENTAGE, maxDFPercent);
     conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
-    DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf);
+    DistributedCache.setCacheFiles(new URI[] {dictionaryFilePath.toUri()}, conf);
 
     Job job = new Job(conf);
-    job.setJobName(": MakePartialVectors: input-folder: " + input + ", dictionary-file: " + dictionaryFilePath.toString());
+    job.setJobName(": MakePartialVectors: input-folder: " + input + ", dictionary-file: "
+        + dictionaryFilePath.toString());
     job.setJarByClass(TFIDFConverter.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(VectorWritable.class);
@@ -308,10 +301,9 @@ public final class TFIDFConverter {
   /**
    * Count the document frequencies of features in parallel using Map/Reduce. The input documents have to be
    * in {@link SequenceFile} format
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
-  private static void startDFCounting(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException {
+  private static void startDFCounting(Path input, Path output)
+    throws IOException, InterruptedException, ClassNotFoundException {
 
     Configuration conf = new Configuration();
     // this conf parameter needs to be set enable serialisation of conf values

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Mon Aug 30 18:41:46 2010
@@ -119,7 +119,7 @@ public class TestCDbwEvaluator extends M
     assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
     assertEquals("separation", 1.5, evaluator.separation());
     assertEquals("intra cluster density", 0.8944271909999157, evaluator.intraClusterDensity());
-    assertEquals("CDbw", 1.3416407864998736, evaluator.CDbw());
+    assertEquals("CDbw", 1.3416407864998736, evaluator.getCDbw());
   }
 
   public void testCDbw1() {
@@ -129,7 +129,7 @@ public class TestCDbwEvaluator extends M
     assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
     assertEquals("separation", 1.0, evaluator.separation());
     assertEquals("intra cluster density", 0.44721359549995787, evaluator.intraClusterDensity());
-    assertEquals("CDbw", 0.44721359549995787, evaluator.CDbw());
+    assertEquals("CDbw", 0.44721359549995787, evaluator.getCDbw());
   }
 
   public void testCDbw2() {
@@ -139,7 +139,7 @@ public class TestCDbwEvaluator extends M
     assertEquals("inter cluster density", 1.017921815355728, evaluator.interClusterDensity());
     assertEquals("separation", 0.24777966925931558, evaluator.separation());
     assertEquals("intra cluster density", 0.29814239699997197, evaluator.intraClusterDensity());
-    assertEquals("CDbw", 0.07387362452083261, evaluator.CDbw());
+    assertEquals("CDbw", 0.07387362452083261, evaluator.getCDbw());
   }
 
   public void testCanopy() throws Exception { // now run the Job

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Mon Aug 30 18:41:46 2010
@@ -28,7 +28,6 @@ import org.apache.lucene.store.RAMDirect
 import org.apache.lucene.util.Version;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.NamedVector;
-import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.utils.vectors.TFIDF;
 import org.apache.mahout.utils.vectors.TermInfo;