You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 18:56:01 UTC
svn commit: r909861 [4/4] - in /lucene/mahout/trunk/utils/src:
main/java/org/apache/mahout/clustering/lda/
main/java/org/apache/mahout/text/ main/java/org/apache/mahout/utils/
main/java/org/apache/mahout/utils/clustering/
main/java/org/apache/mahout/ut...
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java Sat Feb 13 17:55:56 2010
@@ -22,9 +22,8 @@
import java.util.HashMap;
-import junit.framework.TestCase;
+import junit.framework.Assert;
-import org.apache.mahout.utils.nlp.collocations.llr.Gram;
import org.junit.Test;
public class GramTest {
@@ -34,36 +33,36 @@
Gram one = new Gram("foo", 2, HEAD);
Gram two = new Gram("foo", 3, HEAD);
- TestCase.assertTrue(one.equals(two));
- TestCase.assertTrue(two.equals(one));
+ Assert.assertTrue(one.equals(two));
+ Assert.assertTrue(two.equals(one));
Gram three = new Gram("foo", 4, TAIL);
Gram four = new Gram("foo");
- TestCase.assertTrue(!three.equals(two));
- TestCase.assertTrue(four.equals(one));
- TestCase.assertTrue(one.equals(four));
+ Assert.assertTrue(!three.equals(two));
+ Assert.assertTrue(four.equals(one));
+ Assert.assertTrue(one.equals(four));
Gram five = new Gram("foobar", 4, TAIL);
- TestCase.assertTrue(!five.equals(four));
- TestCase.assertTrue(!five.equals(three));
- TestCase.assertTrue(!five.equals(two));
- TestCase.assertTrue(!five.equals(one));
+ Assert.assertTrue(!five.equals(four));
+ Assert.assertTrue(!five.equals(three));
+ Assert.assertTrue(!five.equals(two));
+ Assert.assertTrue(!five.equals(one));
}
@Test
public void testHashing() {
- Gram[] input =
+ Gram[] input =
{
- new Gram("foo", 2, HEAD),
- new Gram("foo", 3, HEAD),
- new Gram("foo", 4, TAIL),
- new Gram("foo", 5, TAIL),
- new Gram("bar", 6, HEAD),
- new Gram("bar", 7, TAIL),
- new Gram("bar", 8),
- new Gram("bar")
+ new Gram("foo", 2, HEAD),
+ new Gram("foo", 3, HEAD),
+ new Gram("foo", 4, TAIL),
+ new Gram("foo", 5, TAIL),
+ new Gram("bar", 6, HEAD),
+ new Gram("bar", 7, TAIL),
+ new Gram("bar", 8),
+ new Gram("bar")
};
HashMap<Gram,Gram> map = new HashMap<Gram,Gram>();
@@ -78,32 +77,32 @@
// frequencies of the items in the map.
int[] freq = {
- 5,
- 3,
- 9,
- 5,
- 15,
- 7,
- 8,
- 1
+ 5,
+ 3,
+ 9,
+ 5,
+ 15,
+ 7,
+ 8,
+ 1
};
// true if the index should be the item in the map
boolean[] memb = {
- true,
- false,
- true,
- false,
- true,
- true,
- false,
- false
+ true,
+ false,
+ true,
+ false,
+ true,
+ true,
+ false,
+ false
};
for (int i = 0; i < input.length; i++) {
System.err.println(i);
- TestCase.assertEquals(freq[i], input[i].getFrequency());
- TestCase.assertEquals(memb[i], input[i] == map.get(input[i]));
+ Assert.assertEquals(freq[i], input[i].getFrequency());
+ Assert.assertEquals(memb[i], input[i] == map.get(input[i]));
}
}
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java Sat Feb 13 17:55:56 2010
@@ -37,15 +37,15 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/** Test the LLRReducer
+/** Test the LLRReducer
* FIXME: Add negative test cases.
*/
@SuppressWarnings("deprecation")
public class LLRReducerTest {
-
- private static final Logger log =
+
+ private static final Logger log =
LoggerFactory.getLogger(LLRReducerTest.class);
-
+
Reporter reporter;
LLCallback ll;
LLCallback cl;
@@ -54,11 +54,11 @@
OutputCollector<Text,DoubleWritable> collector = new OutputCollector<Text,DoubleWritable>() {
@Override
public void collect(Text key, DoubleWritable value) throws IOException {
- log.info(key.toString() + " " + value.toString());
+ LLRReducerTest.log.info(key.toString() + " " + value.toString());
}
};
-
-
+
+
@Before
public void setUp() {
reporter = EasyMock.createMock(Reporter.class);
@@ -66,7 +66,7 @@
cl = new LLCallback() {
@Override
public double logLikelihoodRatio(int k11, int k12, int k21, int k22) {
- log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" + k22);
+ LLRReducerTest.log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" + k22);
try {
return LogLikelihood.logLikelihoodRatio(k11, k12, k21, k22);
}
@@ -75,48 +75,48 @@
return -1;
}
}
-
+
};
}
-
+
@Test
public void testReduce() throws Exception {
LLRReducer reducer = new LLRReducer(ll);
-
- // test input, input[*][0] is the key,
+
+ // test input, input[*][0] is the key,
// input[*][1..n] are the values passed in via
// the iterator.
Gram[][] input = {
- {new Gram("the best", 1), new Gram("the", 2, HEAD), new Gram("best", 1, TAIL) },
- {new Gram("best of", 1), new Gram("best", 1, HEAD), new Gram("of", 2, TAIL) },
- {new Gram("of times", 2), new Gram("of", 2, HEAD), new Gram("times", 2, TAIL) },
- {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the", 1, TAIL) },
- {new Gram("the worst", 1), new Gram("the", 2, HEAD), new Gram("worst", 1, TAIL) },
- {new Gram("worst of", 1), new Gram("worst", 1, HEAD), new Gram("of", 2, TAIL) }
+ {new Gram("the best", 1), new Gram("the", 2, HEAD), new Gram("best", 1, TAIL) },
+ {new Gram("best of", 1), new Gram("best", 1, HEAD), new Gram("of", 2, TAIL) },
+ {new Gram("of times", 2), new Gram("of", 2, HEAD), new Gram("times", 2, TAIL) },
+ {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the", 1, TAIL) },
+ {new Gram("the worst", 1), new Gram("the", 2, HEAD), new Gram("worst", 1, TAIL) },
+ {new Gram("worst of", 1), new Gram("worst", 1, HEAD), new Gram("of", 2, TAIL) }
};
-
+
int[][] expectations = {
- // A+B, A+!B, !A+B, !A+!B
- {1, 1, 0, 5}, // the best
- {1, 0, 1, 5}, // best of
- {2, 0, 0, 5}, // of times
- {1, 0, 0, 6}, // times the
- {1, 1, 0, 5}, // the worst
- {1, 0, 1, 5} // worst of
+ // A+B, A+!B, !A+B, !A+!B
+ {1, 1, 0, 5}, // the best
+ {1, 0, 1, 5}, // best of
+ {2, 0, 0, 5}, // of times
+ {1, 0, 0, 6}, // times the
+ {1, 1, 0, 5}, // the worst
+ {1, 0, 1, 5} // worst of
};
-
+
for (int[] ee: expectations) {
EasyMock.expect(ll.logLikelihoodRatio(ee[0], ee[1], ee[2], ee[3])).andDelegateTo(cl);
}
-
+
EasyMock.replay(ll);
-
+
JobConf config = new JobConf(CollocDriver.class);
config.set(LLRReducer.NGRAM_TOTAL, "7");
reducer.configure(config);
-
+
for (Gram[] ii: input) {
List<Gram> vv = new LinkedList<Gram>();
for (int i = 1; i < ii.length; i++) {
@@ -124,7 +124,7 @@
}
reducer.reduce(ii[0], vv.iterator(), collector, reporter);
}
-
+
EasyMock.verify(ll);
}
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java Sat Feb 13 17:55:56 2010
@@ -42,41 +42,41 @@
*/
@SuppressWarnings("deprecation")
public class NGramCollectorTest {
-
+
OutputCollector<Gram,Gram> collector;
Reporter reporter;
-
+
@Before
@SuppressWarnings("unchecked")
public void setUp() {
collector = EasyMock.createMock(OutputCollector.class);
reporter = EasyMock.createMock(Reporter.class);
}
-
+
@Test
public void testCollectNgrams() throws Exception {
-
+
String input = "the best of times the worst of times";
-
- String[][] values =
+
+ String[][] values =
new String[][]{
- {"h_the", "the best"},
- {"t_best", "the best"},
- {"h_best", "best of"},
- {"t_of", "best of"},
- {"h_of", "of times"},
- {"t_times", "of times"},
- {"h_times", "times the"},
- {"t_the", "times the"},
- {"h_the", "the worst"},
- {"t_worst", "the worst"},
- {"h_worst", "worst of"},
- {"t_of", "worst of"},
- {"h_of", "of times"},
- {"t_times", "of times"}
+ {"h_the", "the best"},
+ {"t_best", "the best"},
+ {"h_best", "best of"},
+ {"t_of", "best of"},
+ {"h_of", "of times"},
+ {"t_times", "of times"},
+ {"h_times", "times the"},
+ {"t_the", "times the"},
+ {"h_the", "the worst"},
+ {"t_worst", "the worst"},
+ {"h_worst", "worst of"},
+ {"t_of", "worst of"},
+ {"h_of", "of times"},
+ {"t_times", "of times"}
};
// set up expectations for mocks. ngram max size = 2
-
+
// setup expectations
for (String[] v: values) {
Type p = v[0].startsWith("h") ? HEAD : TAIL;
@@ -84,24 +84,24 @@
Gram ngram = new Gram(v[1]);
collector.collect(subgram, ngram);
}
-
+
reporter.incrCounter(NGRAM_TOTAL, 7);
EasyMock.replay(reporter, collector);
Reader r = new StringReader(input);
-
+
JobConf conf = new JobConf();
conf.set(NGramCollector.MAX_SHINGLE_SIZE, "2");
conf.set(NGramCollector.ANALYZER_CLASS, TestAnalyzer.class.getName());
-
+
NGramCollector c = new NGramCollector();
c.configure(conf);
c.collectNgrams(r, collector, reporter);
-
+
EasyMock.verify(reporter, collector);
}
-
+
/** A lucene 2.9 standard analyzer with no stopwords. */
public static class TestAnalyzer extends Analyzer {
final Analyzer a;
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Sat Feb 13 17:55:56 2010
@@ -17,40 +17,40 @@
package org.apache.mahout.utils.vectors;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Random;
+
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.function.UnaryFunction;
import org.apache.mahout.math.Vector;
-
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-import java.util.Random;
+import org.apache.mahout.math.function.UnaryFunction;
public class RandomVectorIterable implements Iterable<Vector>{
-
+
private int numItems = 100;
public enum VectorType {DENSE, SPARSE}
-
+
private VectorType type = VectorType.SPARSE;
-
+
public RandomVectorIterable() {
}
-
+
public RandomVectorIterable(int numItems) {
this.numItems = numItems;
}
-
+
public RandomVectorIterable(int numItems, VectorType type) {
this.numItems = numItems;
this.type = type;
}
-
+
@Override
public Iterator<Vector> iterator() {
return new VectIterator();
}
-
+
private class VectIterator implements Iterator<Vector>{
private int count = 0;
private final Random random = RandomUtils.getRandom();
@@ -58,7 +58,7 @@
public boolean hasNext() {
return count < numItems;
}
-
+
@Override
public Vector next() {
if (!hasNext()) {
@@ -74,7 +74,7 @@
count++;
return result;
}
-
+
@Override
public void remove() {
throw new UnsupportedOperationException();
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java Sat Feb 13 17:55:56 2010
@@ -17,6 +17,10 @@
package org.apache.mahout.utils.vectors;
+import java.io.File;
+
+import junit.framework.Assert;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -27,13 +31,11 @@
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.utils.vectors.io.SequenceFileVectorWriter;
-import java.io.File;
-
public class SequenceFileVectorIterableTest extends MahoutTestCase {
-
+
private File tmpLoc;
private File tmpFile;
-
+
@Override
public void setUp() throws Exception {
super.setUp();
@@ -44,14 +46,14 @@
tmpFile = File.createTempFile("sfvit", ".dat", tmpLoc);
tmpFile.deleteOnExit();
}
-
+
@Override
public void tearDown() throws Exception {
tmpFile.delete();
tmpLoc.delete();
super.tearDown();
}
-
+
public void testIterable() throws Exception {
Path path = new Path(tmpFile.getAbsolutePath());
Configuration conf = new Configuration();
@@ -61,7 +63,7 @@
RandomVectorIterable iter = new RandomVectorIterable(50);
writer.write(iter);
writer.close();
-
+
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
SequenceFileVectorIterable sfvi = new SequenceFileVectorIterable(seqReader);
int count = 0;
@@ -70,6 +72,6 @@
count++;
}
seqReader.close();
- assertEquals(50, count);
+ Assert.assertEquals(50, count);
}
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java Sat Feb 13 17:55:56 2010
@@ -17,253 +17,255 @@
package org.apache.mahout.utils.vectors.arff;
+import java.text.DateFormat;
+import java.util.Iterator;
+import java.util.Map;
+
+import junit.framework.Assert;
+
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
-import java.text.DateFormat;
-import java.util.Iterator;
-import java.util.Map;
-
public class ARFFVectorIterableTest extends MahoutTestCase {
-
+
public void testValues() throws Exception {
StringBuilder builder = new StringBuilder();
builder.append("%comments").append('\n').append("@RELATION Mahout").append('\n')
- .append("@ATTRIBUTE foo numeric").append('\n')
- .append("@ATTRIBUTE bar numeric").append('\n')
- .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
- .append("@ATTRIBUTE junk string").append('\n')
- .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
- .append("@DATA").append('\n')
- .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
- .append("2,3").append('\n')
- .append("{0 5,1 23}").append('\n');
+ .append("@ATTRIBUTE foo numeric").append('\n')
+ .append("@ATTRIBUTE bar numeric").append('\n')
+ .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
+ .append("@ATTRIBUTE junk string").append('\n')
+ .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
+ .append("@DATA").append('\n')
+ .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
+ .append("2,3").append('\n')
+ .append("{0 5,1 23}").append('\n');
ARFFModel model = new MapBackedARFFModel();
ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model);
- assertEquals("Mahout", iterable.getModel().getRelation());
+ Assert.assertEquals("Mahout", iterable.getModel().getRelation());
Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
- assertNotNull(bindings);
- assertEquals(5, bindings.size());
+ Assert.assertNotNull(bindings);
+ Assert.assertEquals(5, bindings.size());
Iterator<Vector> iter = iterable.iterator();
- assertTrue(iter.hasNext());
+ Assert.assertTrue(iter.hasNext());
Vector next = iter.next();
- assertNotNull(next);
- assertTrue("Wrong instanceof", next instanceof DenseVector);
- assertEquals(1.0, next.get(0));
- assertEquals(2.0, next.get(1));
- assertTrue(iter.hasNext());
+ Assert.assertNotNull(next);
+ Assert.assertTrue("Wrong instanceof", next instanceof DenseVector);
+ Assert.assertEquals(1.0, next.get(0));
+ Assert.assertEquals(2.0, next.get(1));
+ Assert.assertTrue(iter.hasNext());
next = iter.next();
- assertNotNull(next);
- assertTrue("Wrong instanceof", next instanceof DenseVector);
- assertEquals(2.0, next.get(0));
- assertEquals(3.0, next.get(1));
-
- assertTrue(iter.hasNext());
+ Assert.assertNotNull(next);
+ Assert.assertTrue("Wrong instanceof", next instanceof DenseVector);
+ Assert.assertEquals(2.0, next.get(0));
+ Assert.assertEquals(3.0, next.get(1));
+
+ Assert.assertTrue(iter.hasNext());
next = iter.next();
- assertNotNull(next);
- assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector);
- assertEquals(5.0, next.get(0));
- assertEquals(23.0, next.get(1));
-
- assertFalse(iter.hasNext());
+ Assert.assertNotNull(next);
+ Assert.assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector);
+ Assert.assertEquals(5.0, next.get(0));
+ Assert.assertEquals(23.0, next.get(1));
+
+ Assert.assertFalse(iter.hasNext());
}
-
+
public void testDense() throws Exception {
ARFFModel model = new MapBackedARFFModel();
- ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_DENSE_ARFF, model);
+ ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_DENSE_ARFF, model);
int count = 0;
for (Vector vector : iterable) {
- assertTrue("Vector is not dense", vector instanceof DenseVector);
+ Assert.assertTrue("Vector is not dense", vector instanceof DenseVector);
count++;
}
- assertEquals(10, count);
+ Assert.assertEquals(10, count);
}
-
+
public void testSparse() throws Exception {
ARFFModel model = new MapBackedARFFModel();
- ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_SPARSE_ARFF, model);
+ ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_SPARSE_ARFF, model);
int count = 0;
for (Vector vector : iterable) {
- assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
count++;
}
- assertEquals(10, count);
+ Assert.assertEquals(10, count);
}
-
+
public void testNonNumeric() throws Exception {
-
+
MapBackedARFFModel model = new MapBackedARFFModel();
- ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+ ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model);
int count = 0;
for (Vector vector : iterable) {
- assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
count++;
}
- assertEquals(10, count);
+ Assert.assertEquals(10, count);
Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
- assertNotNull(nominalMap);
- assertEquals(1, nominalMap.size());
+ Assert.assertNotNull(nominalMap);
+ Assert.assertEquals(1, nominalMap.size());
Map<String, Integer> noms = nominalMap.get("bar");
- assertNotNull("nominals for bar are null", noms);
- assertEquals(2, noms.size());
+ Assert.assertNotNull("nominals for bar are null", noms);
+ Assert.assertEquals(2, noms.size());
Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
- assertNotNull("Type map null", integerARFFTypeMap);
- assertEquals(5, integerARFFTypeMap.size());
+ Assert.assertNotNull("Type map null", integerARFFTypeMap);
+ Assert.assertEquals(5, integerARFFTypeMap.size());
Map<String, Long> words = model.getWords();
- assertNotNull("words null", words);
- assertEquals(10, words.size());
+ Assert.assertNotNull("words null", words);
+ Assert.assertEquals(10, words.size());
//System.out.println("Words: " + words);
Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
- assertNotNull("date format null", integerDateFormatMap);
- assertEquals(1, integerDateFormatMap.size());
-
+ Assert.assertNotNull("date format null", integerDateFormatMap);
+ Assert.assertEquals(1, integerDateFormatMap.size());
+
}
-
+
public void testMultipleNoms() throws Exception {
MapBackedARFFModel model = new MapBackedARFFModel();
- ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+ ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model);
int count = 0;
for (Vector vector : iterable) {
- assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
count++;
}
- assertEquals(10, count);
+ Assert.assertEquals(10, count);
Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
- assertNotNull(nominalMap);
- assertEquals(1, nominalMap.size());
+ Assert.assertNotNull(nominalMap);
+ Assert.assertEquals(1, nominalMap.size());
Map<String, Integer> noms = nominalMap.get("bar");
- assertNotNull("nominals for bar are null", noms);
- assertEquals(2, noms.size());
+ Assert.assertNotNull("nominals for bar are null", noms);
+ Assert.assertEquals(2, noms.size());
Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
- assertNotNull("Type map null", integerARFFTypeMap);
- assertEquals(5, integerARFFTypeMap.size());
+ Assert.assertNotNull("Type map null", integerARFFTypeMap);
+ Assert.assertEquals(5, integerARFFTypeMap.size());
Map<String, Long> words = model.getWords();
- assertNotNull("words null", words);
- assertEquals(10, words.size());
+ Assert.assertNotNull("words null", words);
+ Assert.assertEquals(10, words.size());
//System.out.println("Words: " + words);
Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
- assertNotNull("date format null", integerDateFormatMap);
- assertEquals(1, integerDateFormatMap.size());
+ Assert.assertNotNull("date format null", integerDateFormatMap);
+ Assert.assertEquals(1, integerDateFormatMap.size());
model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
- model.getNominalMap());
- iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
+ model.getNominalMap());
+ iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF2, model);
count = 0;
for (Vector vector : iterable) {
- assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
count++;
}
nominalMap = model.getNominalMap();
- assertNotNull(nominalMap);
- assertEquals(2, nominalMap.size());
+ Assert.assertNotNull(nominalMap);
+ Assert.assertEquals(2, nominalMap.size());
noms = nominalMap.get("test");
- assertNotNull("nominals for bar are null", noms);
- assertEquals(2, noms.size());
+ Assert.assertNotNull("nominals for bar are null", noms);
+ Assert.assertEquals(2, noms.size());
}
-
-
+
+
private static final String SAMPLE_DENSE_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar NUMERIC\n" +
- " @ATTRIBUTE hockey NUMERIC\n" +
- " @ATTRIBUTE football NUMERIC\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " 23.1,3.23,1.2,0.2\n" +
- " 2.9,3.0,1.2,0.2\n" +
- " 2.7,3.2,1.3,0.2\n" +
- " 2.6,3.1,1.23,0.2\n" +
- " 23.0,3.6,1.2,0.2\n" +
- " 23.2,3.9,1.7,0.2\n" +
- " 2.6,3.2,1.2,0.3\n" +
- " 23.0,3.2,1.23,0.2\n" +
- " 2.2,2.9,1.2,0.2\n" +
- " 2.9,3.1,1.23,0.1\n";
-
-
+ " % \n" +
+ " % Comments go here" +
+ " % \n" +
+ " @RELATION Mahout\n" +
+ '\n' +
+ " @ATTRIBUTE foo NUMERIC\n" +
+ " @ATTRIBUTE bar NUMERIC\n" +
+ " @ATTRIBUTE hockey NUMERIC\n" +
+ " @ATTRIBUTE football NUMERIC\n" +
+ " \n" +
+ '\n' +
+ '\n' +
+ " @DATA\n" +
+ " 23.1,3.23,1.2,0.2\n" +
+ " 2.9,3.0,1.2,0.2\n" +
+ " 2.7,3.2,1.3,0.2\n" +
+ " 2.6,3.1,1.23,0.2\n" +
+ " 23.0,3.6,1.2,0.2\n" +
+ " 23.2,3.9,1.7,0.2\n" +
+ " 2.6,3.2,1.2,0.3\n" +
+ " 23.0,3.2,1.23,0.2\n" +
+ " 2.2,2.9,1.2,0.2\n" +
+ " 2.9,3.1,1.23,0.1\n";
+
+
private static final String SAMPLE_SPARSE_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar NUMERIC\n" +
- " @ATTRIBUTE hockey NUMERIC\n" +
- " @ATTRIBUTE football NUMERIC\n" +
- " @ATTRIBUTE tennis NUMERIC\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
- " {0 2.9}\n" +
- " {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
- " {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
- " {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
- " {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
- " {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
- " {1 23.0,2 3.2,3 1.23}\n" +
- " {1 2.2,2 2.94 0.2}\n" +
- " {1 2.9,2 3.1}\n";
-
+ " % \n" +
+ " % Comments go here" +
+ " % \n" +
+ " @RELATION Mahout\n" +
+ '\n' +
+ " @ATTRIBUTE foo NUMERIC\n" +
+ " @ATTRIBUTE bar NUMERIC\n" +
+ " @ATTRIBUTE hockey NUMERIC\n" +
+ " @ATTRIBUTE football NUMERIC\n" +
+ " @ATTRIBUTE tennis NUMERIC\n" +
+ " \n" +
+ '\n' +
+ '\n' +
+ " @DATA\n" +
+ " {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
+ " {0 2.9}\n" +
+ " {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
+ " {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
+ " {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
+ " {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
+ " {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
+ " {1 23.0,2 3.2,3 1.23}\n" +
+ " {1 2.2,2 2.94 0.2}\n" +
+ " {1 2.9,2 3.1}\n";
+
private static final String NON_NUMERIC_ARFF = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE junk NUMERIC\n" +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE bar {c,d}\n" +
- " @ATTRIBUTE hockey string\n" +
- " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {2 c,3 gretzky,4 1973-10-23}\n" +
- " {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
- " {2 c,3 bossy,4 1981-10-23}\n" +
- " {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
- " {3 esposito,4 1973-04-23}\n" +
- " {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
- " {3 richard,4 1973-10-12}\n" +
- " {3 howe,4 1983-06-23}\n" +
- " {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
- " {2 c,3 roy,4 1973-10-13}\n";
-
+ " % \n" +
+ " % Comments go here" +
+ " % \n" +
+ " @RELATION Mahout\n" +
+ '\n' +
+ " @ATTRIBUTE junk NUMERIC\n" +
+ " @ATTRIBUTE foo NUMERIC\n" +
+ " @ATTRIBUTE bar {c,d}\n" +
+ " @ATTRIBUTE hockey string\n" +
+ " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
+ " \n" +
+ '\n' +
+ '\n' +
+ " @DATA\n" +
+ " {2 c,3 gretzky,4 1973-10-23}\n" +
+ " {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
+ " {2 c,3 bossy,4 1981-10-23}\n" +
+ " {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
+ " {3 esposito,4 1973-04-23}\n" +
+ " {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
+ " {3 richard,4 1973-10-12}\n" +
+ " {3 howe,4 1983-06-23}\n" +
+ " {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
+ " {2 c,3 roy,4 1973-10-13}\n";
+
private static final String NON_NUMERIC_ARFF2 = " % Comments\n" +
- " % \n" +
- " % Comments go here" +
- " % \n" +
- " @RELATION Mahout\n" +
- '\n' +
- " @ATTRIBUTE junk NUMERIC\n" +
- " @ATTRIBUTE foo NUMERIC\n" +
- " @ATTRIBUTE test {f,z}\n" +
- " @ATTRIBUTE hockey string\n" +
- " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
- " \n" +
- '\n' +
- '\n' +
- " @DATA\n" +
- " {2 f,3 gretzky,4 1973-10-23}\n" +
- " {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
- " {2 f,3 bossy,4 1981-10-23}\n" +
- " {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
- " {3 esposito,4 1973-04-23}\n" +
- " {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
- " {3 richard,4 1973-10-12}\n" +
- " {3 howe,4 1983-06-23}\n" +
- " {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
- " {2 f,3 roy,4 1973-10-13}\n";
+ " % \n" +
+ " % Comments go here" +
+ " % \n" +
+ " @RELATION Mahout\n" +
+ '\n' +
+ " @ATTRIBUTE junk NUMERIC\n" +
+ " @ATTRIBUTE foo NUMERIC\n" +
+ " @ATTRIBUTE test {f,z}\n" +
+ " @ATTRIBUTE hockey string\n" +
+ " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
+ " \n" +
+ '\n' +
+ '\n' +
+ " @DATA\n" +
+ " {2 f,3 gretzky,4 1973-10-23}\n" +
+ " {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
+ " {2 f,3 bossy,4 1981-10-23}\n" +
+ " {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
+ " {3 esposito,4 1973-04-23}\n" +
+ " {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
+ " {3 richard,4 1973-10-12}\n" +
+ " {3 howe,4 1983-06-23}\n" +
+ " {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
+ " {2 f,3 roy,4 1973-10-13}\n";
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java Sat Feb 13 17:55:56 2010
@@ -17,27 +17,29 @@
package org.apache.mahout.utils.vectors.io;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
+import java.io.File;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.Assert;
+
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.utils.vectors.RandomVectorIterable;
-import java.io.File;
-import java.io.StringWriter;
-import java.util.List;
-import java.util.ArrayList;
-
public class VectorWriterTest extends MahoutTestCase {
-
+
private File tmpLoc;
private File tmpFile;
-
+
@Override
public void setUp() throws Exception {
super.setUp();
@@ -48,14 +50,14 @@
tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);
tmpFile.deleteOnExit();
}
-
+
@Override
public void tearDown() throws Exception {
tmpFile.delete();
tmpLoc.delete();
super.tearDown();
}
-
+
public void testSFVW() throws Exception {
Path path = new Path(tmpFile.getAbsolutePath());
Configuration conf = new Configuration();
@@ -65,7 +67,7 @@
RandomVectorIterable iter = new RandomVectorIterable(50);
writer.write(iter);
writer.close();
-
+
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
LongWritable key = new LongWritable();
VectorWritable value = new VectorWritable();
@@ -73,9 +75,9 @@
while (seqReader.next(key, value)){
count++;
}
- assertEquals(count + " does not equal: " + 50, 50, count);
+ Assert.assertEquals(count + " does not equal: " + 50, 50, count);
}
-
+
public void test() throws Exception {
StringWriter strWriter = new StringWriter();
VectorWriter writer = new JWriterVectorWriter(strWriter);
@@ -85,8 +87,8 @@
writer.write(vectors);
writer.close();
StringBuffer buffer = strWriter.getBuffer();
- assertNotNull(buffer);
- assertTrue(buffer.length() > 0);
-
+ Assert.assertNotNull(buffer);
+ Assert.assertTrue(buffer.length() > 0);
+
}
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Sat Feb 13 17:55:56 2010
@@ -17,63 +17,65 @@
package org.apache.mahout.utils.vectors.lucene;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
+import junit.framework.Assert;
+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.utils.vectors.Weight;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.TFIDF;
import org.apache.mahout.utils.vectors.TermInfo;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.utils.vectors.Weight;
public class LuceneIterableTest extends MahoutTestCase {
private RAMDirectory directory;
-
+
private static final String [] DOCS = {
- "The quick red fox jumped over the lazy brown dogs.",
- "Mary had a little lamb whose fleece was white as snow.",
- "Moby Dick is a story of a whale and a man obsessed.",
- "The robber wore a black fleece jacket and a baseball cap.",
- "The English Springer Spaniel is the best of all dogs."
- };
-
-
+ "The quick red fox jumped over the lazy brown dogs.",
+ "Mary had a little lamb whose fleece was white as snow.",
+ "Moby Dick is a story of a whale and a man obsessed.",
+ "The robber wore a black fleece jacket and a baseball cap.",
+ "The English Springer Spaniel is the best of all dogs."
+ };
+
+
@Override
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
- for (int i = 0; i < DOCS.length; i++){
+ for (int i = 0; i < LuceneIterableTest.DOCS.length; i++){
Document doc = new Document();
Field id = new Field("id", "doc_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(id);
//Store both position and offset information
- Field text = new Field("content", DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
+ Field text = new Field("content", LuceneIterableTest.DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
doc.add(text);
writer.addDocument(doc);
}
writer.close();
}
-
+
public void testIterable() throws Exception {
IndexReader reader = IndexReader.open(directory, true);
Weight weight = new TFIDF();
TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper);
-
+
//TODO: do something more meaningful here
for (Vector vector : iterable) {
- assertNotNull(vector);
- assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector);
- assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0);
+ Assert.assertNotNull(vector);
+ Assert.assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0);
}
}
-
-
+
+
}
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java Sat Feb 13 17:55:56 2010
@@ -52,46 +52,48 @@
public static final String DELIM = " .,?;:!\t\n\r";
public static final String ERRORSET = "`1234567890"
- + "-=~@#$%^&*()_+[]{}'\"/<>|\\";
+ + "-=~@#$%^&*()_+[]{}'\"/<>|\\";
private static final Random random = RandomUtils.getRandom();
private FileSystem fs;
private static char getRandomDelimiter() {
- return DELIM.charAt(random.nextInt(DELIM.length()));
+ return DictionaryVectorizerTest.DELIM.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.DELIM.length()));
}
public static String getRandomDocument() {
- int length = (AVG_DOCUMENT_LENGTH >> 1)
- + random.nextInt(AVG_DOCUMENT_LENGTH);
- StringBuilder sb = new StringBuilder(length * AVG_SENTENCE_LENGTH
- * AVG_WORD_LENGTH);
+ int length = (DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH >> 1)
+ + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH);
+ StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_SENTENCE_LENGTH
+ * DictionaryVectorizerTest.AVG_WORD_LENGTH);
for (int i = 0; i < length; i++) {
- sb.append(getRandomSentence());
+ sb.append(DictionaryVectorizerTest.getRandomSentence());
}
return sb.toString();
}
public static String getRandomSentence() {
- int length = (AVG_SENTENCE_LENGTH >> 1)
- + random.nextInt(AVG_SENTENCE_LENGTH);
- StringBuilder sb = new StringBuilder(length * AVG_WORD_LENGTH);
+ int length = (DictionaryVectorizerTest.AVG_SENTENCE_LENGTH >> 1)
+ + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_SENTENCE_LENGTH);
+ StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_WORD_LENGTH);
for (int i = 0; i < length; i++) {
- sb.append(getRandomString()).append(' ');
+ sb.append(DictionaryVectorizerTest.getRandomString()).append(' ');
}
- sb.append(getRandomDelimiter());
+ sb.append(DictionaryVectorizerTest.getRandomDelimiter());
return sb.toString();
}
public static String getRandomString() {
- int length = (AVG_WORD_LENGTH >> 1) + random.nextInt(AVG_WORD_LENGTH);
+ int length = (DictionaryVectorizerTest.AVG_WORD_LENGTH >> 1) + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_WORD_LENGTH);
StringBuilder sb = new StringBuilder(length);
for (int i = 0; i < length; i++) {
- sb.append(CHARSET.charAt(random.nextInt(CHARSET.length())));
+ sb.append(DictionaryVectorizerTest.CHARSET.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.CHARSET.length())));
+ }
+ if (DictionaryVectorizerTest.random.nextInt(10) == 0) {
+ sb.append(DictionaryVectorizerTest.ERRORSET.charAt(DictionaryVectorizerTest.random
+ .nextInt(DictionaryVectorizerTest.ERRORSET.length())));
}
- if (random.nextInt(10) == 0) sb.append(ERRORSET.charAt(random
- .nextInt(ERRORSET.length())));
return sb.toString();
}
@@ -101,7 +103,7 @@
if (f.isDirectory()) {
String[] contents = f.list();
for (String content : contents) {
- rmr(f.toString() + File.separator + content);
+ DictionaryVectorizerTest.rmr(f.toString() + File.separator + content);
}
}
f.delete();
@@ -111,31 +113,31 @@
@Override
public void setUp() throws Exception {
super.setUp();
- rmr("output");
- rmr("testdata");
+ DictionaryVectorizerTest.rmr("output");
+ DictionaryVectorizerTest.rmr("testdata");
Configuration conf = new Configuration();
fs = FileSystem.get(conf);
}
public void testCreateTermFrequencyVectors() throws IOException,
- InterruptedException,
- ClassNotFoundException,
- URISyntaxException {
+ InterruptedException,
+ ClassNotFoundException,
+ URISyntaxException {
Configuration conf = new Configuration();
String pathString = "testdata/documents/docs.file";
Path path = new Path(pathString);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
- Text.class, Text.class);
+ Text.class, Text.class);
- for (int i = 0; i < NUM_DOCS; i++) {
+ for (int i = 0; i < DictionaryVectorizerTest.NUM_DOCS; i++) {
writer.append(new Text("Document::ID::" + i), new Text(
- getRandomDocument()));
+ DictionaryVectorizerTest.getRandomDocument()));
}
writer.close();
Class<? extends Analyzer> analyzer = new StandardAnalyzer(
- Version.LUCENE_CURRENT).getClass();
+ Version.LUCENE_CURRENT).getClass();
DocumentProcessor.tokenizeDocuments(pathString, analyzer,
- "output/tokenized-documents");
+ "output/tokenized-documents");
DictionaryVectorizer.createTermFrequencyVectors("output/tokenized-documents",
"output/wordcount", 2, 1, 0.0f, 1, 100, false);
TFIDFConverter.processTfIdf("output/wordcount/vectors", "output/tfidf/", 100, 1, 99, 1.0f, false);