You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/24 22:16:40 UTC
svn commit: r807361 [2/2] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/
core/src/main/java/org/apache/mahout/classifier/bayes/
core/src/main/java/org/apache/mahout/classifier/cbayes/
core/src/main/java/org/a...
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java Mon Aug 24 20:16:37 2009
@@ -55,25 +55,22 @@
CDRule mutated = mutation.mutate(new CDRule(rule), rng);
// check the ranges
- double min, max;
- double value, newval;
- int nbcats;
for (int condInd = 0; condInd < mutated.getNbConditions(); condInd++) {
int attrInd = CDRule.attributeIndex(condInd);
- value = rule.getV(condInd);
- newval = mutated.getV(condInd);
+ double value = rule.getV(condInd);
+ double newval = mutated.getV(condInd);
modified = modified || (value != newval);
if (dataset.isNumerical(attrInd)) {
- min = dataset.getMin(attrInd);
- max = dataset.getMax(attrInd);
+ double min = dataset.getMin(attrInd);
+ double max = dataset.getMax(attrInd);
assertInRange(newval, min, max);
assertTrue(Math.abs(newval - value) <= (max - min) * range);
} else {
- nbcats = dataset.getNbValues(attrInd);
+ int nbcats = dataset.getNbValues(attrInd);
assertInRange(newval, 0, nbcats);
}
@@ -84,7 +81,7 @@
assertTrue(modified);
}
- private void assertInRange(double value, double min, double max) {
+ private static void assertInRange(double value, double min, double max) {
TestCase.assertTrue("value < min", value >= min);
TestCase.assertTrue("value > max", value <= max);
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java Mon Aug 24 20:16:37 2009
@@ -61,7 +61,7 @@
}
}
- private void assertInRange(double value, double min, double max) {
+ private static void assertInRange(double value, double min, double max) {
Assert.assertTrue("value < min", value >= min);
Assert.assertTrue("value > max", value <= max);
}
@@ -77,14 +77,14 @@
*
*/
public void testWCondition() {
- int n = 100; // repeat the test n times
// the dataline has all its attributes set to 0d
DataLine dl = EasyMock.createMock(DataLine.class);
- EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(0d).atLeastOnce();
+ EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(0.0).atLeastOnce();
EasyMock.replay(dl);
// all the conditions are : attribut < 0
+ int n = 100; // repeat the test n times
for (int nloop = 0; nloop < n; nloop++) {
double thr = rng.nextDouble();
@@ -116,19 +116,19 @@
*
*/
public void testOConditionNumerical() {
- int n = 100; // repeat the test n times
// the dataline has all its attributes set to 1d
DataLine dl = EasyMock.createMock(DataLine.class);
- EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1d).atLeastOnce();
+ EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1.0d).atLeastOnce();
EasyMock.replay(dl);
+ int n = 100; // repeat the test n times
for (int nloop = 0; nloop < n; nloop++) {
mock.numericalDataset();
- CDRule rule = new CDRule(0.);
+ CDRule rule = new CDRule(0.0);
for (int condInd = 0; condInd < rule.getNbConditions(); condInd++) {
- rule.setW(condInd, 1.); // all weights are 1 (active)
+ rule.setW(condInd, 1.0); // all weights are 1 (active)
rule.setO(condInd, rng.nextBoolean());
rule.setV(condInd, 0);
}
@@ -152,21 +152,21 @@
*
*/
public void testOConditionCategorical() {
- int n = 100; // repeat the test n times
// the dataline has all its attributes set to 1d
DataLine dl = EasyMock.createMock(DataLine.class);
- EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1d).atLeastOnce();
+ EasyMock.expect(dl.getAttribut(EasyMock.anyInt())).andReturn(1.0d).atLeastOnce();
EasyMock.replay(dl);
Random rng = new MersenneTwisterRNG();
+ int n = 100; // repeat the test n times
for (int nloop = 0; nloop < n; nloop++) {
mock.categoricalDataset();
// all weights are 1 (active)
- CDRule rule = new CDRule(0.);
+ CDRule rule = new CDRule(0.0);
for (int condInd = 0; condInd < rule.getNbConditions(); condInd++) {
- rule.setW(condInd, 1.);
+ rule.setW(condInd, 1.0);
rule.setO(condInd, rng.nextBoolean());
rule.setV(condInd, rng.nextInt(2)); // two categories
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java Mon Aug 24 20:16:37 2009
@@ -42,39 +42,45 @@
private long current;
- private long size;
+ private final long size;
- public MockReader(long size) {
+ MockReader(long size) {
assert size > 0 : "size == 0";
this.size = size;
}
+ @Override
public void close() throws IOException {
// TODO Auto-generated method stub
}
+ @Override
public LongWritable createKey() {
// TODO Auto-generated method stub
return null;
}
+ @Override
public Text createValue() {
// TODO Auto-generated method stub
return null;
}
+ @Override
public long getPos() throws IOException {
// TODO Auto-generated method stub
return 0;
}
+ @Override
public float getProgress() throws IOException {
// TODO Auto-generated method stub
return 0;
}
+ @Override
public boolean next(LongWritable key, Text value) throws IOException {
if (current == size) {
return false;
@@ -89,13 +95,11 @@
int n = 20;
for (int nloop = 0; nloop < n; nloop++) {
- long datasetSize = 100;
MersenneTwisterRNG rng = new MersenneTwisterRNG();
byte[] seed = rng.getSeed();
double threshold = rng.nextDouble();
JobConf conf = new JobConf();
- RndLineRecordReader rndReader;
Set<Long> dataset = new HashSet<Long>();
LongWritable key = new LongWritable();
Text value = new Text();
@@ -104,7 +108,8 @@
// read the training set
split.storeJobParameters(conf);
- rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
+ long datasetSize = 100;
+ RndLineRecordReader rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
while (rndReader.next(key, value)) {
assertTrue("duplicate line index", dataset.add(key.get()));
}
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java Mon Aug 24 20:16:37 2009
@@ -45,9 +45,8 @@
private Descriptors randomDescriptors(int nbattributes, double numRate, double catRate) {
char[] descriptors = new char[nbattributes];
- double rnd;
for (int index = 0; index < nbattributes; index++) {
- rnd = rng.nextDouble();
+ double rnd = rng.nextDouble();
if (rnd < numRate) {
// numerical attribute
descriptors[index] = 'N';
@@ -93,12 +92,10 @@
private void randomDataset(FileSystem fs, Path input, Descriptors descriptors,
Object[][] descriptions) throws IOException {
int nbfiles = rng.nextInt(20) + 1;
- FSDataOutputStream out;
- BufferedWriter writer;
for (int floop = 0; floop < nbfiles; floop++) {
- out = fs.create(new Path(input, "file." + floop));
- writer = new BufferedWriter(new OutputStreamWriter(out));
+ FSDataOutputStream out = fs.create(new Path(input, "file." + floop));
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
int nblines = rng.nextInt(200) + 1;
for (int line = 0; line < nblines; line++) {
@@ -173,13 +170,13 @@
// Start the tool
List<String> result = new ArrayList<String>();
- int rindex=0;
CDInfosTool.gatherInfos(descriptors, inpath, result);
// check the results
Collection<String> target = new ArrayList<String>();
assertEquals(nbNonIgnored(descriptors), result.size());
+ int rindex = 0;
for (int index = 0; index < nbattrs; index++) {
if (descriptors.isIgnored(index)) {
continue;
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java Mon Aug 24 20:16:37 2009
@@ -34,10 +34,10 @@
ToolMapper mapper = new ToolMapper();
// no attribute is ignored
- String dataline = "A1, A2, A3, A4, A5, A6";
char[] descriptors = { 'N', 'N', 'C', 'C', 'N', 'N' };
mapper.configure(descriptors);
+ String dataline = "A1, A2, A3, A4, A5, A6";
value.set(dataline);
mapper.map(key, value, output, null);
@@ -57,10 +57,10 @@
ToolMapper mapper = new ToolMapper();
// no attribute is ignored
- String dataline = "A1, I, A3, I, I, A6";
char[] descriptors = { 'N', 'I', 'C', 'I', 'I', 'N' };
mapper.configure(descriptors);
+ String dataline = "A1, I, A3, I, I, A6";
value.set(dataline);
mapper.map(key, value, output, null);
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/MockDataSet.java Mon Aug 24 20:16:37 2009
@@ -29,11 +29,11 @@
*/
public class MockDataSet {
- private Random rng;
+ private final Random rng;
- private int maxnba;
+ private final int maxnba;
- private DataSet dataset;
+ private final DataSet dataset;
/**
*
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRule.java Mon Aug 24 20:16:37 2009
@@ -38,6 +38,7 @@
this.rng = rng;
}
+ @Override
public int classify(DataLine dl) {
int label = dl.getLabel();
int prediction = rng.nextInt(2);
Modified: lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java (original)
+++ lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/utils/RandomRuleResults.java Mon Aug 24 20:16:37 2009
@@ -22,10 +22,13 @@
import java.util.HashMap;
import java.util.Map;
-public class RandomRuleResults {
+public final class RandomRuleResults {
private static final Map<Integer, CDFitness> results = new HashMap<Integer, CDFitness>();
+ private RandomRuleResults() {
+ }
+
public static synchronized void addResult(int ruleid, CDFitness fit) {
CDFitness f = results.get(ruleid);
if (f == null)
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Aug 24 20:16:37 2009
@@ -89,7 +89,7 @@
FileSystem fs = FileSystem.get(path.toUri(), conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
- Writer writer = null;
+ Writer writer;
if (cmdLine.hasOption(outputOpt)) {
writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
} else {
@@ -102,11 +102,11 @@
sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
}
boolean countOnly = cmdLine.hasOption(countOpt);
- long count = 0;
Writable key = (Writable) reader.getKeyClass().newInstance();
Writable value = (Writable) reader.getValueClass().newInstance();
writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ").append(String.valueOf(value.getClass())).append(StringUtil.LINE_SEP);
writer.flush();
+ long count = 0;
if (countOnly == false) {
while (reader.next(key, value)) {
writer.append("Key: ").append(String.valueOf(key));
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Mon Aug 24 20:16:37 2009
@@ -40,9 +40,7 @@
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
-import java.io.BufferedWriter;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
@@ -56,11 +54,13 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
public final class ClusterDumper {
private static final Logger log = LoggerFactory.getLogger(ClusterDumper.class);
private static final String LINE_SEP = System.getProperty("line.separator");
+ private static final Pattern TAB_PATTERN = Pattern.compile("\t");
private ClusterDumper() {
}
@@ -110,14 +110,14 @@
JobClient client = new JobClient();
JobConf conf = new JobConf(Job.class);
client.setConf(conf);
- Map<String, List<String>> clusterIdToPoints = null;
+ Map<String, List<String>> clusterIdToPoints;
if (cmdLine.hasOption(pointsOpt)) {
//read in the points
clusterIdToPoints = readPoints(cmdLine.getValue(pointsOpt).toString(), conf);
} else {
clusterIdToPoints = Collections.emptyMap();
}
- Writer writer = null;
+ Writer writer;
if (cmdLine.hasOption(outputOpt)){
writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
} else {
@@ -229,12 +229,12 @@
result.add("dummyentry");
}
- String line = null;
+ String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith("#")) {
continue;
}
- String[] tokens = line.split("\t");
+ String[] tokens = TAB_PATTERN.split(line);
if (tokens.length < 3) {
continue;
}
@@ -244,11 +244,11 @@
return result;
}
- class TermIndexWeight {
+ static class TermIndexWeight {
public int index = -1;
public double weight = 0;
- public TermIndexWeight(int index, double weight) {
+ TermIndexWeight(int index, double weight) {
this.index = index;
this.weight = weight;
}
@@ -261,7 +261,7 @@
Iterator<Vector.Element> iter = vector.iterateNonZero();
while (iter.hasNext()) {
Vector.Element elt = iter.next();
- vectorTerms.add(new ClusterDumper().new TermIndexWeight(elt.index(), elt.get()));
+ vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
}
// Sort results in reverse order (ie weight in descending order)
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/strings/StringUtil.java Mon Aug 24 20:16:37 2009
@@ -1,11 +1,23 @@
-package org.apache.mahout.utils.strings;
-
-
/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- **/
-public class StringUtil {
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.utils.strings;
+
+public interface StringUtil {
- public static final String LINE_SEP = System.getProperty("line.separator");
+ String LINE_SEP = System.getProperty("line.separator");
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -31,10 +31,9 @@
* <p/>
* The key is any {@link org.apache.hadoop.io.Writable} and the value is a {@link org.apache.mahout.matrix.Vector}.
* It can handle any class that implements Vector as long as it has a no-arg constructor.
- *
- **/
+ */
public class SequenceFileVectorIterable implements VectorIterable {
- private SequenceFile.Reader reader;
+ private final SequenceFile.Reader reader;
private boolean transpose = false;
public SequenceFileVectorIterable(SequenceFile.Reader reader) {
@@ -58,8 +57,8 @@
}
public class SeqFileIterator implements Iterator<Vector> {
- private Writable key;
- private Writable value;
+ private final Writable key;
+ private final Writable value;
private SeqFileIterator() throws IllegalAccessException, InstantiationException {
if (transpose == false){
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TF.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,15 +15,11 @@
* limitations under the License.
*/
-
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.Similarity;
-
+package org.apache.mahout.utils.vectors;
/**
* {@link org.apache.mahout.utils.vectors.Weight} based on term frequency only
- *
- **/
+ */
public class TF implements Weight {
@Override
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TFIDF.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,15 +15,11 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
-
-/**
- *
- *
- **/
public class TFIDF implements Weight {
private Similarity sim = new DefaultSimilarity();
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermEntry.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,15 +15,12 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors;
-/**
- *
- *
- **/
public class TermEntry {
- public String term;
- public int termIdx;
- public int docFreq;
+ public final String term;
+ public final int termIdx;
+ public final int docFreq;
public TermEntry(String term, int termIdx, int docFreq) {
this.term = term;
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/TermInfo.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,12 +15,10 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors;
+
import java.util.Iterator;
-/**
- *
- *
- **/
public interface TermInfo {
int totalTerms(String field);
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Mon Aug 24 20:16:37 2009
@@ -46,8 +46,7 @@
* Can read in a {@link org.apache.hadoop.io.SequenceFile} of {@link org.apache.mahout.matrix.Vector}s
* and dump out the results using {@link org.apache.mahout.matrix.Vector#asFormatString()} to either the console
* or to a file.
- *
- **/
+ */
public final class VectorDumper {
private static final Logger log = LoggerFactory.getLogger(VectorDumper.class);
@@ -97,7 +96,7 @@
FileSystem fs = FileSystem.get(path.toUri(), conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
SequenceFileVectorIterable vectorIterable = new SequenceFileVectorIterable(reader, cmdLine.hasOption(vectorAsKeyOpt));
- Writer writer = null;
+ Writer writer;
if (cmdLine.hasOption(outputOpt)) {
writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
} else {
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,12 +15,9 @@
* limitations under the License.
*/
-import org.apache.mahout.matrix.Vector;
+package org.apache.mahout.utils.vectors;
+import org.apache.mahout.matrix.Vector;
-/**
- *
- *
- **/
public interface VectorIterable extends Iterable<Vector>{
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Weight.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,11 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors;
-/**
- *
- *
- **/
public interface Weight {
/**
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.arff;
+
import java.util.Map;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
@@ -25,15 +26,14 @@
* An interface for representing an ARFFModel. Implementations can decide on the best approach
* for storing the model, as some approaches will be fine for smaller files, while larger
* ones may require a better implementation.
- *
- **/
+ */
public interface ARFFModel {
- public static final DateFormat DEFAULT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
- public static final String ARFF_SPARSE = "{";//indicates the vector is sparse
- public static final String ARFF_COMMENT = "%";
- public static final String ATTRIBUTE = "@attribute";
- public static final String DATA = "@data";
- public static final String RELATION = "@relation";
+ DateFormat DEFAULT_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ String ARFF_SPARSE = "{";//indicates the vector is sparse
+ String ARFF_COMMENT = "%";
+ String ATTRIBUTE = "@attribute";
+ String DATA = "@data";
+ String RELATION = "@relation";
String getRelation();
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java Mon Aug 24 20:16:37 2009
@@ -1,9 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.utils.vectors.arff;
public enum ARFFType {
NUMERIC("numeric"), NOMINAL("{"), DATE("date"), STRING("string");
- private String indicator;
+ private final String indicator;
ARFFType(String indicator) {
this.indicator = indicator;
}
@@ -12,8 +29,6 @@
return indicator;
}
-
-
public String getLabel(String line) {
int idx = line.indexOf(indicator);
return line.substring(ARFFModel.ATTRIBUTE.length(),
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.arff;
+
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.SparseVector;
import org.apache.mahout.matrix.Vector;
@@ -33,10 +34,11 @@
import java.text.SimpleDateFormat;
import java.text.DateFormat;
import java.util.Iterator;
+import java.util.regex.Pattern;
/**
- * Read in ARFF (http://www.cs.waikato.ac.nz/~ml/weka/arff.html) and create {@link org.apache.mahout.matrix.Vector}s
+ * Read in ARFF (http://www.cs.waikato.ac.nz/~ml/weka/arff.html) and create {@link Vector}s
* <p/>
* Attribute type handling:
* <ul>
@@ -54,9 +56,10 @@
*/
public class ARFFVectorIterable implements VectorIterable {
- protected BufferedReader buff;
- protected boolean inData;
- protected ARFFModel model;
+ private final BufferedReader buff;
+ private final ARFFModel model;
+ private static final Pattern COMMA_PATTERN = Pattern.compile(",");
+ private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
public ARFFVectorIterable(File file, ARFFModel model) throws IOException {
@@ -78,47 +81,47 @@
buff = new BufferedReader(reader);
}
//grab the attributes, then start the iterator at the first line of data
- String line = null;
- int labelNumber = 0;
- inData = false;
this.model = model;
+ int labelNumber = 0;
+ String line;
+ boolean inData = false;
while ((line = buff.readLine()) != null) {
line = line.trim();
String lower = line.toLowerCase();
- ARFFType type;
- Integer labelNumInt = new Integer(labelNumber);
+ Integer labelNumInt = labelNumber;
if (lower.startsWith(ARFFModel.ARFF_COMMENT)) {
continue;
} else if (lower.startsWith(ARFFModel.RELATION)) {
model.setRelation(line.substring(ARFFModel.RELATION.length()).trim());
} else if (lower.startsWith(ARFFModel.ATTRIBUTE)) {
String label;
- if (lower.indexOf(ARFFType.NUMERIC.getIndicator()) != -1) {
+ ARFFType type;
+ if (lower.contains(ARFFType.NUMERIC.getIndicator())) {
label = ARFFType.NUMERIC.getLabel(lower);
type = ARFFType.NUMERIC;
- } else if (lower.indexOf(ARFFType.STRING.getIndicator()) != -1) {
+ } else if (lower.contains(ARFFType.STRING.getIndicator())) {
label = ARFFType.STRING.getLabel(lower);
type = ARFFType.STRING;
//TODO: create a map so we know which
- } else if (lower.indexOf(ARFFType.NOMINAL.getIndicator()) != -1) {
+ } else if (lower.contains(ARFFType.NOMINAL.getIndicator())) {
label = ARFFType.NOMINAL.getLabel(lower);
type = ARFFType.NOMINAL;
//@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
int classIdx = lower.indexOf(ARFFType.NOMINAL.getIndicator());
- String [] classes = line.substring(classIdx + 1, line.length() - 1).split(",");
+ String [] classes = COMMA_PATTERN.split(line.substring(classIdx + 1, line.length() - 1));
for (int i = 0; i < classes.length; i++) {
model.addNominal(label, classes[i].trim(), i);
}
- } else if (lower.indexOf(ARFFType.DATE.getIndicator()) != -1) {
+ } else if (lower.contains(ARFFType.DATE.getIndicator())) {
label = ARFFType.DATE.getLabel(lower);
type = ARFFType.DATE;
//TODO: DateFormatter map
DateFormat format = ARFFModel.DEFAULT_DATE_FORMAT;
int idx = lower.indexOf(ARFFType.DATE.getIndicator());
- String[] split = line.split(" ");
+ String[] split = SPACE_PATTERN.split(line);
if (split.length >= 4) {//we have a date format
String formStr = line.substring(idx + ARFFType.DATE.getIndicator().length()).trim();
if (formStr.startsWith("\"")) {
@@ -161,7 +164,7 @@
try {
while ((line = buff.readLine()) != null) {
line = line.trim();
- if (line.equals("") == false && line.startsWith(ARFFModel.ARFF_COMMENT) == false) {
+ if (line.length() > 0 && line.startsWith(ARFFModel.ARFF_COMMENT) == false) {
break;
}
}
@@ -176,19 +179,19 @@
@Override
public Vector next() {
- Vector result = null;
+ Vector result;
if (line.startsWith(ARFFModel.ARFF_SPARSE)) {
line = line.substring(1, line.length() - 1);
- String[] splits = line.split(",");
+ String[] splits = COMMA_PATTERN.split(line);
result = new SparseVector(model.getLabelSize());
- for (int i = 0; i < splits.length; i++) {
- String[] data = splits[i].split(" ");//first is index, second is
+ for (String split : splits) {
+ String[] data = SPACE_PATTERN.split(split); // first is index, second is
int idx = Integer.parseInt(data[0]);
result.setQuick(idx, model.getValue(data[1], idx));
}
} else {
result = new DenseVector(model.getLabelSize());
- String[] splits = line.split(",");
+ String[] splits = COMMA_PATTERN.split(line);
for (int i = 0; i < splits.length; i++) {
result.setQuick(i, model.getValue(splits[i], i));
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.arff;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -49,12 +50,11 @@
import java.util.Map;
-/**
- *
- *
- **/
public class Driver {
- private transient static Logger log = LoggerFactory.getLogger(Driver.class);
+ private static final Logger log = LoggerFactory.getLogger(Driver.class);
+
+ private Driver() {
+ }
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -125,9 +125,7 @@
}
});
- for (int i = 0; i < files.length; i++) {
- File file = files[i];
-
+ for (File file : files) {
writeFile(outWriter, outDir, file, maxDocs, model);
}
} else {
@@ -154,7 +152,7 @@
ARFFModel model = new MapBackedARFFModel(arffModel.getWords(), arffModel.getWordCount() + 1,
arffModel.getNominalMap());
ARFFVectorIterable iteratable = new ARFFVectorIterable(file, model);
- String outFile = outDir + "/" + file.getName() + ".mvc";
+ String outFile = outDir + '/' + file.getName() + ".mvc";
VectorWriter vectorWriter;
if (outWriter != null) {
@@ -174,12 +172,11 @@
}
private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
- VectorWriter sfWriter;
Path path = new Path(outFile);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, SparseVector.class);
- sfWriter = new SequenceFileVectorWriter(seqWriter);
+ VectorWriter sfWriter = new SequenceFileVectorWriter(seqWriter);
return sfWriter;
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,29 +15,34 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.arff;
+
import java.text.DateFormat;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Collections;
+import java.util.regex.Pattern;
/**
- * Holds ARFF information in {@link java.util.Map}.
+ * Holds ARFF information in {@link Map}.
*/
public class MapBackedARFFModel implements ARFFModel {
- protected long wordCount = 1;
+ private static final Pattern QUOTE_PATTERN = Pattern.compile("\"");
- protected String relation;
+ private long wordCount = 1;
- private Map<String, Integer> labelBindings;
- private Map<Integer, String> idxLabel;
- private Map<Integer, ARFFType> typeMap; //key is the vector index, value is the type
- private Map<Integer, DateFormat> dateMap;
- private Map<String, Map<String, Integer>> nominalMap;
- private Map<String, Long> words;
+ private String relation;
+
+ private final Map<String, Integer> labelBindings;
+ private final Map<Integer, String> idxLabel;
+ private final Map<Integer, ARFFType> typeMap; //key is the vector index, value is the type
+ private final Map<Integer, DateFormat> dateMap;
+ private final Map<String, Map<String, Integer>> nominalMap;
+ private final Map<String, Long> words;
public MapBackedARFFModel() {
this(new HashMap<String, Long>(), 1, new HashMap<String, Map<String, Integer>>());
@@ -55,10 +59,12 @@
}
+ @Override
public String getRelation() {
return relation;
}
+ @Override
public void setRelation(String relation) {
this.relation = relation;
}
@@ -70,11 +76,12 @@
* @param idx The position in the ARFF data
* @return A double representing the data
*/
+ @Override
public double getValue(String data, int idx) {
- double result = 0;
ARFFType type = typeMap.get(idx);
- data = data.replaceAll("\"", "");
+ data = QUOTE_PATTERN.matcher(data).replaceAll("");
data = data.trim();
+ double result = 0.0;
switch (type) {
case NUMERIC: {
result = processNumeric(data);
@@ -125,31 +132,31 @@
*/
//Not sure how scalable this is going to be
protected double processString(String data) {
- double result;
- data = data.replaceAll("\"", "");
+ data = QUOTE_PATTERN.matcher(data).replaceAll("");
//map it to an long
Long theLong = words.get(data);
if (theLong == null) {
theLong = wordCount++;
words.put(data, theLong);
}
- result = theLong;
- return result;
+ return theLong;
}
- protected double processNumeric(String data) {
+ protected static double processNumeric(String data) {
return Double.parseDouble(data);
}
protected double processDate(String data, int idx) {
- double result;
DateFormat format = dateMap.get(idx);
if (format == null) {
format = DEFAULT_DATE_FORMAT;
}
- Date date = null;
+ double result;
try {
- date = format.parse(data);
+ Date date;
+ synchronized (format) {
+ date = format.parse(data);
+ }
result = date.getTime();// hmmm, what kind of loss casting long to double?
} catch (ParseException e) {
throw new RuntimeException(e);
@@ -161,6 +168,7 @@
* The vector attributes (labels in Mahout speak), unmodifiable
* @return the map
*/
+ @Override
public Map<String, Integer> getLabelBindings() {
return Collections.unmodifiableMap(labelBindings);
}
@@ -185,6 +193,7 @@
* Map nominals to ids. Should only be modified by calling {@link ARFFModel#addNominal(String, String, int)}
* @return the map
*/
+ @Override
public Map<String, Map<String, Integer>> getNominalMap() {
return nominalMap;
}
@@ -193,14 +202,17 @@
* Immutable map of words to the long id used for those words
* @return The map
*/
+ @Override
public Map<String, Long> getWords() {
return words;
}
+ @Override
public Integer getNominalValue(String label, String nominal){
return nominalMap.get(label).get(nominal);
}
+ @Override
public void addNominal(String label, String nominal, int idx) {
Map<String, Integer> noms = nominalMap.get(label);
if (noms == null) {
@@ -210,27 +222,33 @@
noms.put(nominal, idx);
}
+ @Override
public DateFormat getDateFormat(Integer idx){
return dateMap.get(idx);
}
+ @Override
public void addDateFormat(Integer idx, DateFormat format) {
dateMap.put(idx, format);
}
+ @Override
public Integer getLabelIndex(String label){
return labelBindings.get(label);
}
+ @Override
public void addLabel(String label, Integer idx) {
labelBindings.put(label, idx);
idxLabel.put(idx, label);
}
+ @Override
public ARFFType getARFFType(Integer idx){
return typeMap.get(idx);
}
+ @Override
public void addType(Integer idx, ARFFType type) {
typeMap.put(idx, type);
}
@@ -239,10 +257,12 @@
* The count of the number of words seen
* @return the count
*/
+ @Override
public long getWordCount() {
return wordCount;
}
+ @Override
public int getLabelSize() {
return labelBindings.size();
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterTermInfoWriter.java Mon Aug 24 20:16:37 2009
@@ -27,13 +27,12 @@
/**
* Write ther TermInfo out to a {@link java.io.Writer}
- *
- **/
+ */
public class JWriterTermInfoWriter implements TermInfoWriter {
- protected Writer writer;
- protected String delimiter;
- protected String field;
+ private final Writer writer;
+ private final String delimiter;
+ private final String field;
public JWriterTermInfoWriter(Writer writer, String delimiter, String field) {
this.writer = writer;
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java Mon Aug 24 20:16:37 2009
@@ -24,7 +24,7 @@
import java.io.Writer;
public class JWriterVectorWriter implements VectorWriter {
- protected Writer writer;
+ private final Writer writer;
public JWriterVectorWriter(Writer writer) {
this.writer = writer;
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java Mon Aug 24 20:16:37 2009
@@ -27,10 +27,9 @@
/**
* Closes the writer when done
- *
- **/
+ */
public class SequenceFileVectorWriter implements VectorWriter {
- protected SequenceFile.Writer writer;
+ private final SequenceFile.Writer writer;
public SequenceFileVectorWriter(SequenceFile.Writer writer) {
this.writer = writer;
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term;
@@ -24,27 +25,26 @@
import java.util.Map;
import java.util.Iterator;
-import java.util.HashMap;
import java.util.LinkedHashMap;
import java.io.IOException;
/**
* Caches TermEntries from a single field. Materializes all values in the TermEnum to memory (much like FieldCache)
- *
- **/
+ */
public class CachedTermInfo implements TermInfo {
- Map<String, TermEntry> termEntries;
- String field;
+ private final Map<String, TermEntry> termEntries;
+ private final String field;
+
public CachedTermInfo(IndexReader reader, String field, int minDf, int maxDfPercent) throws IOException {
this.field = field;
TermEnum te = reader.terms(new Term(field, ""));
- int count = 0;
int numDocs = reader.numDocs();
double percent = numDocs * maxDfPercent / 100.0;
//Should we use a linked hash map so that we know terms are in order?
termEntries = new LinkedHashMap<String, TermEntry>();
+ int count = 0;
do {
Term term = te.term();
if (term == null || term.field().equals(field) == false){
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -53,13 +54,11 @@
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
-
-/**
- *
- *
- **/
public class Driver {
- private transient static Logger log = LoggerFactory.getLogger(Driver.class);
+ private static final Logger log = LoggerFactory.getLogger(Driver.class);
+
+ private Driver() {
+ }
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -139,7 +138,7 @@
}
Directory dir = FSDirectory.open(file);
IndexReader reader = IndexReader.open(dir, true);
- Weight weight = null;
+ Weight weight;
if (cmdLine.hasOption(weightOpt)) {
String wString = cmdLine.getValue(weightOpt).toString();
if (wString.equalsIgnoreCase("tf")) {
@@ -163,11 +162,9 @@
}
TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);
VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
- LuceneIterable iterable = null;
- String power = null;
double norm = -1;
if (cmdLine.hasOption(powerOpt)) {
- power = cmdLine.getValue(powerOpt).toString();
+ String power = cmdLine.getValue(powerOpt).toString();
if (power.equals("INF")) {
norm = Double.POSITIVE_INFINITY;
} else {
@@ -178,6 +175,7 @@
if (cmdLine.hasOption(idFieldOpt)) {
idField = cmdLine.getValue(idFieldOpt).toString();
}
+ LuceneIterable iterable;
if (norm == LuceneIterable.NO_NORMALIZING) {
iterable = new LuceneIterable(reader, idField, field, mapper, LuceneIterable.NO_NORMALIZING);
} else {
@@ -221,15 +219,13 @@
}
private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
- VectorWriter sfWriter;
Path path = new Path(outFile);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
//TODO: Make this parameter driven
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, SparseVector.class);
- sfWriter = new SequenceFileVectorWriter(seqWriter);
- return sfWriter;
+ return new SequenceFileVectorWriter(seqWriter);
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.document.FieldSelector;
@@ -27,14 +28,8 @@
import java.util.Iterator;
import java.util.Collections;
-
-/**
- *
- *
- **/
public class LuceneIterable implements VectorIterable {
-
private IndexReader indexReader;
private String field;
private String idField;
@@ -80,7 +75,7 @@
}
private class TDIterator implements Iterator<Vector> {
- private TermDocs termDocs;
+ private final TermDocs termDocs;
private TDIterator() throws IOException {
//term docs(null) is a better way of iterating all the docs in Lucene
@@ -98,7 +93,7 @@
@Override
public Vector next() {
- Vector result = null;
+ Vector result;
int doc = termDocs.doc();
//
try {
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/TFDFMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.mahout.matrix.SparseVector;
@@ -32,14 +33,14 @@
public static final int DEFAULT_CACHE_SIZE = 256;
- protected IndexReader reader;
- protected Vector vector;
+ private final IndexReader reader; // TODO never used?
+ private Vector vector;
- protected Weight weight;
- protected int numTerms;
- protected TermInfo termInfo;
+ private final Weight weight;
+ private int numTerms;
+ private final TermInfo termInfo;
private String field;
- private int numDocs;
+ private final int numDocs;
public TFDFMapper(IndexReader reader, Weight weight, TermInfo termInfo) {
this.reader = reader;
@@ -48,6 +49,7 @@
this.numDocs = reader.numDocs();
}
+ @Override
public Vector getVector() {
return vector;
}
@@ -62,8 +64,9 @@
@Override
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
TermEntry entry = termInfo.getTermEntry(field, term);
- if(entry != null)
+ if (entry != null) {
vector.setQuick(entry.termIdx, weight.calculate(frequency, entry.docFreq, numTerms, numDocs));
+ }
}
@Override
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/VectorMapper.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,14 +15,14 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import org.apache.lucene.index.TermVectorMapper;
import org.apache.mahout.matrix.Vector;
-
/**
* Not thread-safe
- *
- **/
+ */
public abstract class VectorMapper extends TermVectorMapper {
/**
* Can be called after the TermVector has been mapped
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Mon Aug 24 20:16:37 2009
@@ -27,10 +27,10 @@
public class RandomVectorIterable implements VectorIterable{
- int numItems = 100;
- public static enum VectorType {DENSE, SPARSE};
+ private int numItems = 100;
+ public enum VectorType {DENSE, SPARSE}
- VectorType type = VectorType.SPARSE;
+ private VectorType type = VectorType.SPARSE;
public RandomVectorIterable() {
}
@@ -51,7 +51,7 @@
private class VectIterator implements Iterator<Vector>{
int count = 0;
- Random random = new Random();
+ final Random random = new Random();
@Override
public boolean hasNext() {
return count < numItems;
@@ -59,7 +59,7 @@
@Override
public Vector next() {
- Vector result = type.equals(VectorType.SPARSE) ? new SparseVector(numItems) : new DenseVector(numItems);
+ Vector result = type == VectorType.SPARSE ? new SparseVector(numItems) : new DenseVector(numItems);
result.assign(new UnaryFunction(){
@Override
public double apply(double arg1) {
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.arff;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.arff;
+
import junit.framework.TestCase;
import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.SparseVector;
@@ -26,11 +27,6 @@
import java.util.Iterator;
import java.util.Map;
-
-/**
- *
- *
- **/
public class ARFFVectorIterableTest extends TestCase {
public void testValues() throws Exception {
@@ -47,30 +43,30 @@
.append("{0 5,1 23}").append(StringUtil.LINE_SEP);
ARFFModel model = new MapBackedARFFModel();
ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model);
- assertTrue(iterable.getModel().getRelation() + " is not equal to " + "Mahout", iterable.getModel().getRelation().equals("Mahout") == true);
+ assertEquals("Mahout", iterable.getModel().getRelation());
Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
assertNotNull(bindings);
- assertTrue("bindings Size: " + bindings.size() + " is not: " + 5, bindings.size() == 5);
+ assertEquals(5, bindings.size());
Iterator<Vector> iter = iterable.iterator();
assertTrue(iter.hasNext());
Vector next = iter.next();
assertNotNull(next);
assertTrue("Wrong instanceof", next instanceof DenseVector);
- assertEquals("", next.get(0), 1.0);
- assertEquals("", next.get(1), 2.0);
+ assertEquals(1.0, next.get(0));
+ assertEquals(2.0, next.get(1));
assertTrue(iter.hasNext());
next = iter.next();
assertNotNull(next);
assertTrue("Wrong instanceof", next instanceof DenseVector);
- assertEquals("", next.get(0), 2.0);
- assertEquals("", next.get(1), 3.0);
+ assertEquals(2.0, next.get(0));
+ assertEquals(3.0, next.get(1));
assertTrue(iter.hasNext());
next = iter.next();
assertNotNull(next);
assertTrue("Wrong instanceof", next instanceof SparseVector);
- assertEquals("", next.get(0), 5.0);
- assertEquals("", next.get(1), 23.0);
+ assertEquals(5.0, next.get(0));
+ assertEquals(23.0, next.get(1));
assertFalse(iter.hasNext());
}
@@ -83,7 +79,7 @@
assertTrue("Vector is not dense", vector instanceof DenseVector);
count++;
}
- assertTrue(count + " does not equal: " + 10, count == 10);
+ assertEquals(10, count);
}
public void testSparse() throws Exception {
@@ -94,7 +90,7 @@
assertTrue("Vector is not dense", vector instanceof SparseVector);
count++;
}
- assertTrue(count + " does not equal: " + 10, count == 10);
+ assertEquals(10, count);
}
public void testNonNumeric() throws Exception {
@@ -106,23 +102,23 @@
assertTrue("Vector is not dense", vector instanceof SparseVector);
count++;
}
- assertTrue(count + " does not equal: " + 10, count == 10);
+ assertEquals(10, count);
Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
assertNotNull(nominalMap);
- assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 1, nominalMap.size() == 1);
+ assertEquals(1, nominalMap.size());
Map<String, Integer> noms = nominalMap.get("bar");
assertNotNull("nominals for bar are null", noms);
- assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+ assertEquals(2, noms.size());
Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
assertNotNull("Type map null", integerARFFTypeMap);
- assertTrue("integerARFFTypeMap Size: " + integerARFFTypeMap.size() + " is not: " + 5, integerARFFTypeMap.size() == 5);
+ assertEquals(5, integerARFFTypeMap.size());
Map<String, Long> words = model.getWords();
assertNotNull("words null", words);
- assertTrue("words Size: " + words.size() + " is not: " + 10, words.size() == 10);
+ assertEquals(10, words.size());
System.out.println("Words: " + words);
Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
assertNotNull("date format null", integerDateFormatMap);
- assertTrue("integerDateFormatMap Size: " + integerDateFormatMap.size() + " is not: " + 1, integerDateFormatMap.size() == 1);
+ assertEquals(1, integerDateFormatMap.size());
}
@@ -134,23 +130,23 @@
assertTrue("Vector is not dense", vector instanceof SparseVector);
count++;
}
- assertTrue(count + " does not equal: " + 10, count == 10);
+ assertEquals(10, count);
Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
assertNotNull(nominalMap);
- assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 1, nominalMap.size() == 1);
+ assertEquals(1, nominalMap.size());
Map<String, Integer> noms = nominalMap.get("bar");
assertNotNull("nominals for bar are null", noms);
- assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+ assertEquals(2, noms.size());
Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
assertNotNull("Type map null", integerARFFTypeMap);
- assertTrue("integerARFFTypeMap Size: " + integerARFFTypeMap.size() + " is not: " + 5, integerARFFTypeMap.size() == 5);
+ assertEquals(5, integerARFFTypeMap.size());
Map<String, Long> words = model.getWords();
assertNotNull("words null", words);
- assertTrue("words Size: " + words.size() + " is not: " + 10, words.size() == 10);
+ assertEquals(10, words.size());
System.out.println("Words: " + words);
Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
assertNotNull("date format null", integerDateFormatMap);
- assertTrue("integerDateFormatMap Size: " + integerDateFormatMap.size() + " is not: " + 1, integerDateFormatMap.size() == 1);
+ assertEquals(1, integerDateFormatMap.size());
model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
model.getNominalMap());
iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
@@ -161,26 +157,26 @@
}
nominalMap = model.getNominalMap();
assertNotNull(nominalMap);
- assertTrue("nominalMap Size: " + nominalMap.size() + " is not: " + 2, nominalMap.size() == 2);
+ assertEquals(2, nominalMap.size());
noms = nominalMap.get("test");
assertNotNull("nominals for bar are null", noms);
- assertTrue("noms Size: " + noms.size() + " is not: " + 2, noms.size() == 2);
+ assertEquals(2, noms.size());
}
- public static final String SAMPLE_DENSE_ARFF = " % Comments\n" +
+ private static final String SAMPLE_DENSE_ARFF = " % Comments\n" +
" % \n" +
" % Comments go here" +
" % \n" +
" @RELATION Mahout\n" +
- "\n" +
+ '\n' +
" @ATTRIBUTE foo NUMERIC\n" +
" @ATTRIBUTE bar NUMERIC\n" +
" @ATTRIBUTE hockey NUMERIC\n" +
" @ATTRIBUTE football NUMERIC\n" +
" \n" +
- "\n" +
- "\n" +
+ '\n' +
+ '\n' +
" @DATA\n" +
" 23.1,3.23,1.2,0.2\n" +
" 2.9,3.0,1.2,0.2\n" +
@@ -194,20 +190,20 @@
" 2.9,3.1,1.23,0.1\n";
- public static final String SAMPLE_SPARSE_ARFF = " % Comments\n" +
+ private static final String SAMPLE_SPARSE_ARFF = " % Comments\n" +
" % \n" +
" % Comments go here" +
" % \n" +
" @RELATION Mahout\n" +
- "\n" +
+ '\n' +
" @ATTRIBUTE foo NUMERIC\n" +
" @ATTRIBUTE bar NUMERIC\n" +
" @ATTRIBUTE hockey NUMERIC\n" +
" @ATTRIBUTE football NUMERIC\n" +
" @ATTRIBUTE tennis NUMERIC\n" +
" \n" +
- "\n" +
- "\n" +
+ '\n' +
+ '\n' +
" @DATA\n" +
" {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
" {0 2.9}\n" +
@@ -220,20 +216,20 @@
" {1 2.2,2 2.94 0.2}\n" +
" {1 2.9,2 3.1}\n";
- public static final String NON_NUMERIC_ARFF = " % Comments\n" +
+ private static final String NON_NUMERIC_ARFF = " % Comments\n" +
" % \n" +
" % Comments go here" +
" % \n" +
" @RELATION Mahout\n" +
- "\n" +
+ '\n' +
" @ATTRIBUTE junk NUMERIC\n" +
" @ATTRIBUTE foo NUMERIC\n" +
" @ATTRIBUTE bar {c,d}\n" +
" @ATTRIBUTE hockey string\n" +
" @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
" \n" +
- "\n" +
- "\n" +
+ '\n' +
+ '\n' +
" @DATA\n" +
" {2 c,3 gretzky,4 1973-10-23}\n" +
" {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
@@ -246,20 +242,20 @@
" {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
" {2 c,3 roy,4 1973-10-13}\n";
- public static final String NON_NUMERIC_ARFF2 = " % Comments\n" +
+ private static final String NON_NUMERIC_ARFF2 = " % Comments\n" +
" % \n" +
" % Comments go here" +
" % \n" +
" @RELATION Mahout\n" +
- "\n" +
+ '\n' +
" @ATTRIBUTE junk NUMERIC\n" +
" @ATTRIBUTE foo NUMERIC\n" +
" @ATTRIBUTE test {f,z}\n" +
" @ATTRIBUTE hockey string\n" +
" @ATTRIBUTE football date \"yyyy-MM-dd\"\n" +
" \n" +
- "\n" +
- "\n" +
+ '\n' +
+ '\n' +
" @DATA\n" +
" {2 f,3 gretzky,4 1973-10-23}\n" +
" {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=807361&r1=807360&r2=807361&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Mon Aug 24 20:16:37 2009
@@ -1,4 +1,3 @@
-package org.apache.mahout.utils.vectors.lucene;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,8 @@
* limitations under the License.
*/
+package org.apache.mahout.utils.vectors.lucene;
+
import junit.framework.TestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
@@ -30,12 +31,8 @@
import org.apache.mahout.matrix.Vector;
import org.apache.mahout.matrix.SparseVector;
-/**
- *
- *
- **/
public class LuceneIterableTest extends TestCase {
- protected RAMDirectory directory;
+ private RAMDirectory directory;
private static final String [] DOCS = {
"The quick red fox jumped over the lazy brown dogs.",
@@ -48,6 +45,7 @@
@Override
protected void setUp() throws Exception {
+ super.setUp();
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
for (int i = 0; i < DOCS.length; i++){