You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/08/30 20:41:48 UTC
svn commit: r990892 [2/2] - in /mahout/trunk: buildtools/src/main/resources/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
core/src/main/java/org/apache/mahout/clusterin...
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java Mon Aug 30 18:41:46 2010
@@ -107,8 +107,7 @@ public class WikipediaMapper extends Map
throw new IllegalStateException(ex);
}
log.info("Configure: Input Categories size: {} All: {} Exact Match: {}",
- new Object[] { inputCategories.size(), all,
- exactMatchOnly });
+ new Object[] {inputCategories.size(), all, exactMatchOnly});
}
private static String getDocument(String xml) {
Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java Mon Aug 30 18:41:46 2010
@@ -33,7 +33,7 @@ import org.apache.mahout.ga.watchmaker.c
public class CDReducerTest extends MahoutTestCase {
- private static final int nbevals = 100;
+ private static final int NUM_EVALS = 100;
private List<CDFitness> evaluations;
@@ -49,7 +49,7 @@ public class CDReducerTest extends Mahou
int fp = 0;
int tn = 0;
int fn = 0;
- for (int index = 0; index < nbevals; index++) {
+ for (int index = 0; index < NUM_EVALS; index++) {
CDFitness fitness = new CDFitness(rng.nextInt(100), rng.nextInt(100), rng.nextInt(100), rng.nextInt(100));
tp += fitness.getTp();
fp += fitness.getFp();
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java Mon Aug 30 18:41:46 2010
@@ -30,8 +30,10 @@ import java.util.Date;
/**
* Quick medium quality uniform pseudo-random number generator.
*
- * Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively,
- * as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt> and <tt>(0.0,1.0)</tt>, respectively.
+ * Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals
+ * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively,
+ * as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt>
+ * and <tt>(0.0,1.0)</tt>, respectively.
* <p>
* The seed can be any integer satisfying <tt>0 < 4*seed+1 < 2<sup>32</sup></tt>.
* In other words, there must hold <tt>seed >= 0 && seed < 1073741823</tt>.
@@ -83,7 +85,8 @@ public class DRand extends RandomEngine
}
/**
- * Returns a 32 bit uniformly distributed random number in the closed interval <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
+ * Returns a 32 bit uniformly distributed random number in the closed interval
+ * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
* (including <tt>Integer.MIN_VALUE</tt> and <tt>Integer.MAX_VALUE</tt>).
*/
@Override
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Mon Aug 30 18:41:46 2010
@@ -16,9 +16,12 @@ import org.apache.mahout.math.list.IntAr
/**
* Conveniently computes a stable subsequence of elements from a given input sequence;
* Picks (samples) exactly one random element from successive blocks of <tt>weight</tt> input elements each.
- * For example, if weight==2 (a block is 2 elements), and the input is 5*2=10 elements long, then picks 5 random elements from the 10 elements such that
- * one element is randomly picked from the first block, one element from the second block, ..., one element from the last block.
- * weight == 1.0 --> all elements are picked (sampled). weight == 10.0 --> Picks one random element from successive blocks of 10 elements each. Etc.
+ * For example, if weight==2 (a block is 2 elements), and the input is 5*2=10 elements long,
+ * then picks 5 random elements from the 10 elements such that
+ * one element is randomly picked from the first block, one element from the second block, ...,
+ * one element from the last block.
+ * weight == 1.0 --> all elements are picked (sampled). weight == 10.0 --> Picks one random
+ * element from successive blocks of 10 elements each. Etc.
* The subsequence is guaranteed to be <i>stable</i>, i.e. elements never change position relative to each other.
*
*/
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java Mon Aug 30 18:41:46 2010
@@ -46,25 +46,25 @@ class KnownDoubleQuantileEstimator exten
protected RandomSamplingAssistant samplingAssistant;
protected final double samplingRate; // see method sampleNextElement()
- protected final long N; // see method sampleNextElement()
+ protected final long n; // see method sampleNextElement()
/**
* Constructs an approximate quantile finder with b buffers, each having k elements.
*
* @param b the number of buffers
* @param k the number of elements per buffer
- * @param N the total number of elements over which quantiles are to be computed.
+ * @param n the total number of elements over which quantiles are to be computed.
* @param samplingRate 1.0 --> all elements are consumed. 10.0 --> Consumes one random element from successive blocks
* of 10 elements each. Etc.
* @param generator a uniform random number generator.
*/
- KnownDoubleQuantileEstimator(int b, int k, long N, double samplingRate, RandomEngine generator) {
+ KnownDoubleQuantileEstimator(int b, int k, long n, double samplingRate, RandomEngine generator) {
this.samplingRate = samplingRate;
- this.N = N;
+ this.n = n;
this.samplingAssistant = this.samplingRate <= 1.0
? null
- : new RandomSamplingAssistant(Arithmetic.floor(N / samplingRate), N, generator);
+ : new RandomSamplingAssistant(Arithmetic.floor(n / samplingRate), n, generator);
setUp(b, k);
this.clear();
@@ -123,7 +123,7 @@ class KnownDoubleQuantileEstimator exten
RandomSamplingAssistant assist = this.samplingAssistant;
if (assist != null) {
this.samplingAssistant =
- new RandomSamplingAssistant(Arithmetic.floor(N / samplingRate), N, assist.getRandomGenerator());
+ new RandomSamplingAssistant(Arithmetic.floor(n / samplingRate), n, assist.getRandomGenerator());
}
}
@@ -194,17 +194,19 @@ class KnownDoubleQuantileEstimator exten
*/
@Override
public DoubleArrayList quantileElements(DoubleArrayList phis) {
- /*
- * The KNOWN quantile finder reads off quantiles from FULL buffers only.
- * Since there might be a partially full buffer, this method first satisfies this constraint by temporarily filling a few +infinity, -infinity elements to make up a full block.
- * This is in full conformance with the explicit approximation guarantees.
- *
- * For those of you working on online apps:
- * The approximation guarantees are given for computing quantiles AFTER N elements have been filled, not for intermediate displays.
- * If you have one thread filling and another thread displaying concurrently, you will note that in the very beginning the infinities will dominate the display.
- * This could confuse users, because, of course, they don't expect any infinities, even if they "disappear" after a short while.
- * To prevent panic exclude phi's close to zero or one in the early phases of processing.
- */
+ // The KNOWN quantile finder reads off quantiles from FULL buffers only.
+ // Since there might be a partially full buffer, this method first satisfies this constraint by
+ // temporarily filling a few +infinity, -infinity elements to make up a full block.
+ // This is in full conformance with the explicit approximation guarantees.
+ //
+ // For those of you working on online apps:
+ // The approximation guarantees are given for computing quantiles AFTER N elements have
+ // been filled, not for intermediate displays.
+ // If you have one thread filling and another thread displaying concurrently, you will
+ // note that in the very beginning the infinities will dominate the display.
+ // This could confuse users, because, of course, they don't expect any infinities,
+ // even if they "disappear" after a short while.
+ // To prevent panic exclude phi's close to zero or one in the early phases of processing.
DoubleBuffer partial = this.bufferSet._getPartialBuffer();
int missingValues = 0;
if (partial != null) { // any auxiliary infinities needed?
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java Mon Aug 30 18:41:46 2010
@@ -90,7 +90,8 @@ class UnknownDoubleQuantileEstimator ext
sortAscendingByLevel(fullBuffers);
- // if there is only one buffer at the lowest level, then increase its level so that there are at least two at the lowest level.
+ // if there is only one buffer at the lowest level, then increase its level
+ // so that there are at least two at the lowest level.
int minLevel = fullBuffers[1].level();
if (fullBuffers[0].level() < minLevel) {
fullBuffers[0].level(minLevel);
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -43,7 +43,7 @@ public abstract class AbstractMatrix1D e
* @param absRank the absolute rank of the element.
* @return the position.
*/
- protected int _offset(int absRank) {
+ protected int offset(int absRank) {
return absRank;
}
@@ -124,7 +124,7 @@ public abstract class AbstractMatrix1D e
* @param rank the rank of the element.
*/
protected int index(int rank) {
- return _offset(_rank(rank));
+ return offset(_rank(rank));
}
/**
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -77,7 +77,7 @@ class SelectedDenseDoubleMatrix1D extend
* @return the position.
*/
@Override
- protected int _offset(int absRank) {
+ protected int offset(int absRank) {
return offsets[absRank];
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java Mon Aug 30 18:41:46 2010
@@ -80,7 +80,7 @@ class SelectedSparseDoubleMatrix1D exten
* @return the position.
*/
@Override
- protected int _offset(int absRank) {
+ protected int offset(int absRank) {
return offsets[absRank];
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java Mon Aug 30 18:41:46 2010
@@ -31,11 +31,11 @@ public class LUDecomposition implements
* Constructs and returns a new LU Decomposition object; The decomposed matrices can be retrieved via instance methods
* of the returned decomposition object.
*
- * @param A Rectangular matrix
+ * @param a Rectangular matrix
*/
- public LUDecomposition(DoubleMatrix2D A) {
+ public LUDecomposition(DoubleMatrix2D a) {
quick = new LUDecompositionQuick(0); // zero tolerance for compatibility with Jama
- quick.decompose(A.copy());
+ quick.decompose(a.copy());
}
/**
@@ -52,9 +52,11 @@ public class LUDecomposition implements
*
* @return (double) piv
*/
+ /*
private double[] getDoublePivot() {
return quick.getDoublePivot();
}
+ */
/**
* Returns the lower triangular factor, <tt>L</tt>.
@@ -95,17 +97,17 @@ public class LUDecomposition implements
/**
* Solves <tt>A*X = B</tt>.
*
- * @param B A matrix with as many rows as <tt>A</tt> and any number of columns.
+ * @param b A matrix with as many rows as <tt>A</tt> and any number of columns.
* @return <tt>X</tt> so that <tt>L*U*X = B(piv,:)</tt>.
* @throws IllegalArgumentException if </tt>B.rows() != A.rows()</tt>.
* @throws IllegalArgumentException if A is singular, that is, if <tt>!this.isNonsingular()</tt>.
* @throws IllegalArgumentException if <tt>A.rows() < A.columns()</tt>.
*/
- public DoubleMatrix2D solve(DoubleMatrix2D B) {
- DoubleMatrix2D X = B.copy();
- quick.solve(X);
- return X;
+ public DoubleMatrix2D solve(DoubleMatrix2D b) {
+ DoubleMatrix2D x = b.copy();
+ quick.solve(x);
+ return x;
}
/**
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java Mon Aug 30 18:41:46 2010
@@ -39,7 +39,8 @@ import org.apache.mahout.math.list.Doubl
* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.105.1580
*/
public class OnlineSummarizer {
- boolean sorted = true;
+
+ private boolean sorted = true;
// the first several samples are kept so we can boot-strap our estimates cleanly
private DoubleArrayList starter = new DoubleArrayList(100);
@@ -67,11 +68,11 @@ public class OnlineSummarizer {
starter.add(sample);
} else if (n == 100) {
starter.add(sample);
- q[0] = min();
- q[1] = quartile(1);
- q[2] = quartile(2);
- q[3] = quartile(3);
- q[4] = max();
+ q[0] = getMin();
+ q[1] = getQuartile(1);
+ q[2] = getQuartile(2);
+ q[3] = getQuartile(3);
+ q[4] = getMax();
starter = null;
} else {
q[0] = Math.min(sample, q[0]);
@@ -92,19 +93,19 @@ public class OnlineSummarizer {
}
}
- public int count() {
+ public int getCount() {
return n;
}
- public double mean() {
+ public double getMean() {
return mean;
}
- public double sd() {
+ public double getSD() {
return Math.sqrt(variance);
}
- public double min() {
+ public double getMin() {
sort();
if (n == 0) {
throw new IllegalArgumentException("Must have at least one sample to estimate minimum value");
@@ -119,7 +120,7 @@ public class OnlineSummarizer {
}
}
- public double max() {
+ public double getMax() {
sort();
if (n == 0) {
throw new IllegalArgumentException("Must have at least one sample to estimate maximum value");
@@ -127,11 +128,11 @@ public class OnlineSummarizer {
return n <= 100 ? starter.get(99) : q[4];
}
- public double quartile(int i) {
+ public double getQuartile(int i) {
sort();
switch (i) {
case 0:
- return min();
+ return getMin();
case 1:
case 2:
case 3:
@@ -146,13 +147,13 @@ public class OnlineSummarizer {
return starter.get(k) * (1 - u) + starter.get(k + 1) * u;
}
case 4:
- return max();
+ return getMax();
default:
throw new IllegalArgumentException("Quartile number must be in the range [0..4] not " + i);
}
}
- public double median() {
- return quartile(2);
+ public double getMedian() {
+ return getQuartile(2);
}
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java Mon Aug 30 18:41:46 2010
@@ -65,10 +65,8 @@ public abstract class MahoutTestCase ext
private File getTestTempFileOrDir(String name, boolean dir) throws IOException {
File f = new File(getTestTempDir(), name);
f.deleteOnExit();
- if (dir) {
- if (!f.mkdirs()) {
- throw new IOException("Could not make directory " + f);
- }
+ if (dir && !f.mkdirs()) {
+ throw new IOException("Could not make directory " + f);
}
return f;
}
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java Mon Aug 30 18:41:46 2010
@@ -35,6 +35,7 @@ import java.util.Date;
* uses a legacy method to initialize the PRNG state. <ul>
*/
public class MersenneTwisterTest {
+
/**
* Convert an unsigned int stored in a long to a double in a fashion compatible with the C
* reference implementation
@@ -42,12 +43,12 @@ public class MersenneTwisterTest {
* @param y The long value to convert
* @return A double in the range [0..1)
*/
- private double toDouble(long y) {
+ private static double toDouble(long y) {
return (double) (y & 0xffffffffL) * 2.3283064365386963e-10;
}
@Test
- public void test1000_1() {
+ public void test10001() {
MersenneTwister r = new MersenneTwister();
r.setReferenceSeed(4357);
@@ -62,7 +63,7 @@ public class MersenneTwisterTest {
r.setReferenceSeed(4357);
i = 0;
for (Double x : ref1) {
- Assert.assertEquals("t-ref-double-" + i, x, toDouble(r.nextInt()), 1e-7);
+ Assert.assertEquals("t-ref-double-" + i, x, toDouble(r.nextInt()), 1.0e-7);
i++;
}
}
@@ -72,7 +73,7 @@ public class MersenneTwisterTest {
MersenneTwister r = new MersenneTwister(42);
int i = 0;
for (double x : reference3) {
- Assert.assertEquals("t-regression-"+i, x, r.nextDouble(), 1e-7);
+ Assert.assertEquals("t-regression-"+i, x, r.nextDouble(), 1.0e-7);
i++;
}
@@ -108,7 +109,7 @@ public class MersenneTwisterTest {
// output of first 1000 values of nextInt() as determined by a slight modification of
// the reference C implementation
- private long[] reference1 = new long[]{
+ private final long[] reference1 = {
-1427748157, 1585203162, -1181843167, -1341066457, -1831172428,
-812701500, 1164297043, -696771727, 589972756, -182733429,
767115311, -201891849, 1322433849, -937881972, -994918828,
@@ -313,7 +314,7 @@ public class MersenneTwisterTest {
// the first 1000 values output by the reference implementation
// data from http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/991029/mt19937-2.out
- private double[] ref1 = new double[]{
+ private final double[] ref1 = {
0.66757648, 0.36908387, 0.72483069, 0.68775863, 0.57364694,
0.81077819, 0.27108403, 0.83777019, 0.13736374, 0.95745406,
0.17860795, 0.95299339, 0.30790312, 0.78163234, 0.76835241,
@@ -516,7 +517,7 @@ public class MersenneTwisterTest {
0.44807063, 0.06424586, 0.75766097, 0.40567560, 0.23996701
};
- private double[] reference3 = new double[]{
+ private final double[] reference3 = {
0.09575212, 0.03326592, 0.10697744, 0.37992924, 0.13844205,
0.07550350, 0.89109388, 0.50151205, 0.79222822, 0.41823034,
0.84440729, 0.66568635, 0.87830555, 0.17024311, 0.43956848,
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java Mon Aug 30 18:41:46 2010
@@ -17,8 +17,6 @@
package org.apache.mahout.math.stats;
-import org.apache.mahout.math.jet.random.Gamma;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
import org.junit.Assert;
import org.junit.Test;
@@ -28,13 +26,13 @@ public class OnlineSummarizerTest {
@Test
public void testCount() {
OnlineSummarizer x = new OnlineSummarizer();
- Assert.assertEquals(0, x.count());
+ Assert.assertEquals(0, x.getCount());
x.add(1);
- Assert.assertEquals(1, x.count());
+ Assert.assertEquals(1, x.getCount());
for (int i = 2; i < 110; i++) {
x.add(i);
- Assert.assertEquals(i, x.count());
+ Assert.assertEquals(i, x.getCount());
}
}
@@ -80,17 +78,17 @@ public class OnlineSummarizerTest {
private static void check(OnlineSummarizer x, double... values) {
for (int i = 0; i < 5; i++) {
- checkRange("quartile " + i, x.quartile(i), values[2 * i], values[2 * i + 1]);
+ checkRange("quartile " + i, x.getQuartile(i), values[2 * i], values[2 * i + 1]);
}
- Assert.assertEquals(x.quartile(2), x.median(), 0);
+ Assert.assertEquals(x.getQuartile(2), x.getMedian(), 0);
- checkRange("mean", x.mean(), values[10], values[11]);
- checkRange("sd", x.sd(), values[12], values[13]);
+ checkRange("mean", x.getMean(), values[10], values[11]);
+ checkRange("sd", x.getSD(), values[12], values[13]);
}
private static void checkRange(String msg, double v, double low, double high) {
if (v < low || v > high) {
- Assert.fail("Wanted " + msg + " to be in range [" + low + "," + high + "] but got " + v);
+ Assert.fail("Wanted " + msg + " to be in range [" + low + ',' + high + "] but got " + v);
}
}
@@ -112,6 +110,7 @@ public class OnlineSummarizerTest {
return x;
}
+ /*
private static OnlineSummarizer gamma(int n, int seed) {
OnlineSummarizer x = new OnlineSummarizer();
Gamma g = new Gamma(0.01, 100, new MersenneTwister(seed));
@@ -120,6 +119,8 @@ public class OnlineSummarizerTest {
}
return x;
}
+ */
+
}
Modified: mahout/trunk/maven/src/main/resources/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/maven/src/main/resources/findbugs-exclude.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/maven/src/main/resources/findbugs-exclude.xml (original)
+++ mahout/trunk/maven/src/main/resources/findbugs-exclude.xml Mon Aug 30 18:41:46 2010
@@ -10,18 +10,21 @@
<Bug pattern="SE_NO_SERIALVERSIONID"/>
</Match>
<Match>
+ <Bug pattern="EI_EXPOSE_REP"/>
+ </Match>
+ <Match>
<Bug pattern="EI_EXPOSE_REP2"/>
- </Match>
- <Match>
+ </Match>
+ <Match>
<Bug pattern="SIC_INNER_SHOULD_BE_STATIC_ANON"/>
- </Match>
- <Match>
+ </Match>
+ <Match>
<Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"/>
</Match>
- <Match>
+ <Match>
<Bug pattern="SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"/>
</Match>
- <Match>
+ <Match>
<Bug pattern="SE_BAD_FIELD"/>
</Match>
<Match>
Modified: mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
<rule ref="rulesets/basic.xml/BooleanInstantiation"/>
<rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
<rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
- <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+ <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
<rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
<rule ref="rulesets/basic.xml/EmptyIfStmt"/>
<rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
<!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
<!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
- <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+ <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
<rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
<!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
<!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
<!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
- <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+ <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
<!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
<rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
<!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
<!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
<!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
<rule ref="rulesets/naming.xml/NoPackage"/>
<rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
<!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
<!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
<!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
- <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+ <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
<rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
<!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
- <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+ <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
<rule ref="rulesets/strings.xml/StringInstantiation"/>
<rule ref="rulesets/strings.xml/StringToString"/>
<!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java Mon Aug 30 18:41:46 2010
@@ -133,7 +133,7 @@ public final class CDbwDriver extends Ab
conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
// now print out the Results
- System.out.println("CDbw = " + evaluator.CDbw());
+ System.out.println("CDbw = " + evaluator.getCDbw());
System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
System.out.println("Separation = " + evaluator.separation());
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Mon Aug 30 18:41:46 2010
@@ -85,7 +85,7 @@ public class CDbwEvaluator {
}
}
- public double CDbw() {
+ public double getCDbw() {
return intraClusterDensity() * separation();
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Aug 30 18:41:46 2010
@@ -66,7 +66,7 @@ public final class SequenceFileDumper {
withDescription("Print out help").withShortName("h").create();
Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt)
- .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
+ .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
try {
Parser parser = new Parser();
@@ -104,18 +104,18 @@ public final class SequenceFileDumper {
.append(String.valueOf(value.getClass())).append('\n');
writer.flush();
long count = 0;
- if (!countOnly) {
+ if (countOnly) {
while (reader.next(key, value)) {
- writer.append("Key: ").append(String.valueOf(key));
- String str = value.toString();
- writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
- writer.write('\n');
- writer.flush();
count++;
}
writer.append("Count: ").append(String.valueOf(count)).append('\n');
} else {
while (reader.next(key, value)) {
+ writer.append("Key: ").append(String.valueOf(key));
+ String str = value.toString();
+ writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
+ writer.write('\n');
+ writer.flush();
count++;
}
writer.append("Count: ").append(String.valueOf(count)).append('\n');
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java Mon Aug 30 18:41:46 2010
@@ -75,10 +75,10 @@ public final class CollocDriver extends
"ng",
"(Optional) The max size of ngrams to create (2 = bigrams, 3 = trigrams, etc) default: 2",
String.valueOf(DEFAULT_MAX_NGRAM_SIZE));
- addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: " + CollocReducer.DEFAULT_MIN_SUPPORT, String
- .valueOf(CollocReducer.DEFAULT_MIN_SUPPORT));
- addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float) Default is " + LLRReducer.DEFAULT_MIN_LLR, String
- .valueOf(LLRReducer.DEFAULT_MIN_LLR));
+ addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: "
+ + CollocReducer.DEFAULT_MIN_SUPPORT, String.valueOf(CollocReducer.DEFAULT_MIN_SUPPORT));
+ addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float) Default is "
+ + LLRReducer.DEFAULT_MIN_LLR, String.valueOf(LLRReducer.DEFAULT_MIN_LLR));
addOption(DefaultOptionCreator.overwriteOption().create());
addOption("analyzerName", "a", "The class name of the analyzer to use for preprocessing", null);
@@ -150,7 +150,8 @@ public final class CollocDriver extends
}
// parse input and extract collocations
- long ngramCount = generateCollocations(input, output, getConf(), emitUnigrams, maxNGramSize, reduceTasks, minSupport);
+ long ngramCount =
+ generateCollocations(input, output, getConf(), emitUnigrams, maxNGramSize, reduceTasks, minSupport);
// tally collocations and perform LLR calculation
computeNGramsPruneByLLR(output, getConf(), ngramCount, emitUnigrams, minLLRValue, reduceTasks);
@@ -173,9 +174,6 @@ public final class CollocDriver extends
* minimum threshold to prune ngrams
* @param reduceTasks
* number of reducers used
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public static void generateAllGrams(Path input,
Path output,
@@ -183,7 +181,8 @@ public final class CollocDriver extends
int maxNGramSize,
int minSupport,
float minLLRValue,
- int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException {
+ int reduceTasks)
+ throws IOException, InterruptedException, ClassNotFoundException {
// parse input and extract collocations
long ngramCount = generateCollocations(input, output, baseConf, true, maxNGramSize, reduceTasks, minSupport);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java Mon Aug 30 18:41:46 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.utils.nlp.collocations.llr;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
@@ -67,7 +66,7 @@ public class LLRReducer extends Reducer<
@Override
protected void reduce(Gram ngram, Iterable<Gram> values, Context context) throws IOException, InterruptedException {
- int[] gramFreq = { -1, -1 };
+ int[] gramFreq = {-1, -1};
if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java Mon Aug 30 18:41:46 2010
@@ -18,7 +18,6 @@
package org.apache.mahout.utils.vectors.common;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparable;
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Mon Aug 30 18:41:46 2010
@@ -70,7 +70,7 @@ import org.slf4j.LoggerFactory;
*/
public class ClusterLabels {
- class TermInfoClusterInOut implements Comparable<TermInfoClusterInOut> {
+ static class TermInfoClusterInOut implements Comparable<TermInfoClusterInOut> {
private final String term;
private final int inClusterDF;
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java Mon Aug 30 18:41:46 2010
@@ -110,9 +110,6 @@ public final class DictionaryVectorizer
* available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we
* recommend you use a split size of around 400-500MB so that two simultaneous reducers can create
* partial vectors without thrashing the system due to increased swapping
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public static void createTermFrequencyVectors(Path input,
Path output,
@@ -122,7 +119,8 @@ public final class DictionaryVectorizer
float minLLRValue,
int numReducers,
int chunkSizeInMegabytes,
- boolean sequentialAccess) throws IOException, InterruptedException, ClassNotFoundException {
+ boolean sequentialAccess)
+ throws IOException, InterruptedException, ClassNotFoundException {
if (chunkSizeInMegabytes < MIN_CHUNKSIZE) {
chunkSizeInMegabytes = MIN_CHUNKSIZE;
} else if (chunkSizeInMegabytes > MAX_CHUNKSIZE) { // 10GB
@@ -138,12 +136,12 @@ public final class DictionaryVectorizer
List<Path> dictionaryChunks;
if (maxNGramSize == 1) {
startWordCounting(input, dictionaryJobPath, minSupport);
- dictionaryChunks = createDictionaryChunks(minSupport, dictionaryJobPath, output,
+ dictionaryChunks = createDictionaryChunks(dictionaryJobPath, output,
chunkSizeInMegabytes, new LongWritable(), maxTermDimension);
} else {
CollocDriver.generateAllGrams(input, dictionaryJobPath, baseConf, maxNGramSize,
minSupport, minLLRValue, numReducers);
- dictionaryChunks = createDictionaryChunks(minSupport, new Path(
+ dictionaryChunks = createDictionaryChunks(new Path(
new Path(output, DICTIONARY_JOB_FOLDER), CollocDriver.NGRAM_OUTPUT_DIRECTORY), output,
chunkSizeInMegabytes, new DoubleWritable(), maxTermDimension);
}
@@ -176,13 +174,11 @@ public final class DictionaryVectorizer
* Read the feature frequency List which is built at the end of the Word Count Job and assign ids to them.
* This will use constant memory and will run at the speed of your disk read
*
- * @param minSupport
* @param wordCountPath
* @param dictionaryPathBase
* @throws IOException
*/
- private static List<Path> createDictionaryChunks(int minSupport,
- Path wordCountPath,
+ private static List<Path> createDictionaryChunks(Path wordCountPath,
Path dictionaryPathBase,
int chunkSizeInMegabytes,
Writable value,
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java Mon Aug 30 18:41:46 2010
@@ -112,9 +112,6 @@ public final class TFIDFConverter {
* The number of reducers to spawn. This also affects the possible parallelism since each reducer
* will typically produce a single output file containing tf-idf vectors for a subset of the
* documents in the corpus.
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public static void processTfIdf(Path input,
Path output,
@@ -184,10 +181,6 @@ public final class TFIDFConverter {
/**
* Read the document frequency List which is built at the end of the DF Count Job. This will use constant
* memory and will run at the speed of your disk read
- *
- * @param featureCountPath
- * @param dictionaryPathBase
- * @throws IOException
*/
private static Pair<Long[], List<Path>> createDictionaryChunks(Path featureCountPath,
Path dictionaryPathBase,
@@ -205,7 +198,8 @@ public final class TFIDFConverter {
int chunkIndex = 0;
Path chunkPath = new Path(dictionaryPathBase, FREQUENCY_FILE + chunkIndex);
chunkPaths.add(chunkPath);
- SequenceFile.Writer freqWriter = new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
+ SequenceFile.Writer freqWriter =
+ new SequenceFile.Writer(fs, conf, chunkPath, IntWritable.class, LongWritable.class);
long currentChunkSize = 0;
long featureCount = 0;
@@ -239,7 +233,7 @@ public final class TFIDFConverter {
}
featureCount++;
freqWriter.close();
- Long[] counts = { featureCount, vectorCount };
+ Long[] counts = {featureCount, vectorCount};
return new Pair<Long[], List<Path>>(counts, chunkPaths);
}
@@ -262,9 +256,6 @@ public final class TFIDFConverter {
* location of the chunk of features and the id's
* @param output
* output directory were the partial vectors have to be created
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
private static void makePartialVectors(Path input,
Long featureCount,
@@ -273,7 +264,8 @@ public final class TFIDFConverter {
int maxDFPercent,
Path dictionaryFilePath,
Path output,
- boolean sequentialAccess) throws IOException, InterruptedException, ClassNotFoundException {
+ boolean sequentialAccess)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
// this conf parameter needs to be set enable serialisation of conf values
@@ -284,10 +276,11 @@ public final class TFIDFConverter {
conf.setInt(MIN_DF, minDf);
conf.setInt(MAX_DF_PERCENTAGE, maxDFPercent);
conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
- DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf);
+ DistributedCache.setCacheFiles(new URI[] {dictionaryFilePath.toUri()}, conf);
Job job = new Job(conf);
- job.setJobName(": MakePartialVectors: input-folder: " + input + ", dictionary-file: " + dictionaryFilePath.toString());
+ job.setJobName(": MakePartialVectors: input-folder: " + input + ", dictionary-file: "
+ + dictionaryFilePath.toString());
job.setJarByClass(TFIDFConverter.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(VectorWritable.class);
@@ -308,10 +301,9 @@ public final class TFIDFConverter {
/**
* Count the document frequencies of features in parallel using Map/Reduce. The input documents have to be
* in {@link SequenceFile} format
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
- private static void startDFCounting(Path input, Path output) throws IOException, InterruptedException, ClassNotFoundException {
+ private static void startDFCounting(Path input, Path output)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
// this conf parameter needs to be set enable serialisation of conf values
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Mon Aug 30 18:41:46 2010
@@ -119,7 +119,7 @@ public class TestCDbwEvaluator extends M
assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
assertEquals("separation", 1.5, evaluator.separation());
assertEquals("intra cluster density", 0.8944271909999157, evaluator.intraClusterDensity());
- assertEquals("CDbw", 1.3416407864998736, evaluator.CDbw());
+ assertEquals("CDbw", 1.3416407864998736, evaluator.getCDbw());
}
public void testCDbw1() {
@@ -129,7 +129,7 @@ public class TestCDbwEvaluator extends M
assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity());
assertEquals("separation", 1.0, evaluator.separation());
assertEquals("intra cluster density", 0.44721359549995787, evaluator.intraClusterDensity());
- assertEquals("CDbw", 0.44721359549995787, evaluator.CDbw());
+ assertEquals("CDbw", 0.44721359549995787, evaluator.getCDbw());
}
public void testCDbw2() {
@@ -139,7 +139,7 @@ public class TestCDbwEvaluator extends M
assertEquals("inter cluster density", 1.017921815355728, evaluator.interClusterDensity());
assertEquals("separation", 0.24777966925931558, evaluator.separation());
assertEquals("intra cluster density", 0.29814239699997197, evaluator.intraClusterDensity());
- assertEquals("CDbw", 0.07387362452083261, evaluator.CDbw());
+ assertEquals("CDbw", 0.07387362452083261, evaluator.getCDbw());
}
public void testCanopy() throws Exception { // now run the Job
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Mon Aug 30 18:41:46 2010
@@ -28,7 +28,6 @@ import org.apache.lucene.store.RAMDirect
import org.apache.lucene.util.Version;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.NamedVector;
-import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.TFIDF;
import org.apache.mahout.utils.vectors.TermInfo;