You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/02/01 12:25:23 UTC
svn commit: r1441391 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/common/
core/src/main/java/org/apache/mahout/common/lucene/
core/src/main/java/org/apache/mahout/vectorizer/
core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/...
Author: gsingers
Date: Fri Feb 1 11:25:23 2013
New Revision: 1441391
URL: http://svn.apache.org/viewvc?rev=1441391&view=rev
Log:
MAHOUT-1145: style fixes
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Fri Feb 1 11:25:23 2013
@@ -59,7 +59,6 @@ import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
/**
* <p>Superclass of many Mahout Hadoop "jobs". A job drives configuration and launch of one or
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Fri Feb 1 11:25:23 2013
@@ -30,16 +30,15 @@ public class AnalyzerUtils {
/**
* Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in parameters
* to your constructor, you will need to wrap it in an implementation that does not take any arguments
- * @param analyzerClassName
- * @return
- * @throws ClassNotFoundException
+ * @param analyzerClassName - Lucene Analyzer Name
+ * @return {@link Analyzer}
+ * @throws ClassNotFoundException - {@link ClassNotFoundException}
*/
public static Analyzer createAnalyzer(String analyzerClassName) throws ClassNotFoundException {
return createAnalyzer(analyzerClassName, Version.LUCENE_41);
}
public static Analyzer createAnalyzer(String analyzerClassName, Version version) throws ClassNotFoundException {
- Analyzer analyzer = null;
Class<? extends Analyzer> analyzerClass = Class.forName(analyzerClassName).asSubclass(Analyzer.class);
//TODO: GSI: Not sure I like this, many analyzers in Lucene take in the version
@@ -50,14 +49,14 @@ public class AnalyzerUtils {
* Create an Analyzer using the latest {@link org.apache.lucene.util.Version}. Note, if you need to pass in parameters
* to your constructor, you will need to wrap it in an implementation that does not take any arguments
* @param analyzerClass The Analyzer Class to instantiate
- * @return
+ * @return {@link Analyzer}
*/
public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass){
return createAnalyzer(analyzerClass, Version.LUCENE_41);
}
public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version){
- Analyzer analyzer = null;
+ Analyzer analyzer;
if (analyzerClass == StandardAnalyzer.class) {
Class<?>[] params = new Class<?>[1];
params[0] = Version.class;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java Fri Feb 1 11:25:23 2013
@@ -21,7 +21,6 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.lucene.AnalyzerUtils;
import org.apache.mahout.math.NamedVector;
@@ -45,7 +44,7 @@ public class EncodingMapper extends Mapp
public static final String ENCODER_FIELD_NAME = "encoderFieldName";
public static final String ENCODER_CLASS = "encoderClass";
public static final String CARDINALITY = "cardinality";
- private boolean sequentialVecs;
+ private boolean sequentialVectors;
private boolean namedVectors;
private FeatureVectorEncoder encoder;
private int cardinality;
@@ -53,10 +52,10 @@ public class EncodingMapper extends Mapp
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
- sequentialVecs = conf.getBoolean(USE_SEQUENTIAL, false);
+ sequentialVectors = conf.getBoolean(USE_SEQUENTIAL, false);
namedVectors = conf.getBoolean(USE_NAMED_VECTORS, false);
String analyzerName = conf.get(ANALYZER_NAME, StandardAnalyzer.class.getName());
- Analyzer analyzer = null;
+ Analyzer analyzer;
try {
analyzer = AnalyzerUtils.createAnalyzer(analyzerName);
} catch (ClassNotFoundException e) {
@@ -79,7 +78,7 @@ public class EncodingMapper extends Mapp
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
Vector vector;
- if (sequentialVecs) {
+ if (sequentialVectors) {
vector = new SequentialAccessSparseVector(cardinality);
} else {
vector = new RandomAccessSparseVector(cardinality);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Fri Feb 1 11:25:23 2013
@@ -33,7 +33,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Fri Feb 1 11:25:23 2013
@@ -35,7 +35,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.lucene.AnalyzerUtils;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java Fri Feb 1 11:25:23 2013
@@ -28,8 +28,6 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
-import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.lucene.AnalyzerUtils;
import org.apache.mahout.vectorizer.DocumentProcessor;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java Fri Feb 1 11:25:23 2013
@@ -17,18 +17,11 @@
package org.apache.mahout.vectorizer.collocations.llr;
-import java.io.Reader;
-import java.util.Collections;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.vectorizer.collocations.llr.Gram.Type;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Fri Feb 1 11:25:23 2013
@@ -19,10 +19,8 @@ package org.apache.mahout.text;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
-import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Fri Feb 1 11:25:23 2013
@@ -18,7 +18,6 @@
package org.apache.mahout.text.wikipedia;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java Fri Feb 1 11:25:23 2013
@@ -19,7 +19,6 @@ package org.apache.mahout.utils.vectors.
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -51,11 +50,8 @@ public final class LuceneIterator extend
private final Set<String> idFieldSelector;
private final TermInfo terminfo;
private final double normPower;
- private DocsEnum termDocs = null;
-
-
- private int nextDocid;
+ private int nextDocId;
private int numErrorDocs = 0;
private int maxErrorDocs = 0;
@@ -70,7 +66,10 @@ public final class LuceneIterator extend
* @param indexReader {@link IndexReader} to read the documents from.
* @param idField field containing the id. May be null.
* @param field field to use for the Vector
+ * @param terminfo terminfo
+ * @param weight weight
* @param normPower the normalization value. Must be nonnegative, or {@link LuceneIterable#NO_NORMALIZING}
+ * @throws java.io.IOException - {@link java.io.IOException}
*/
public LuceneIterator(IndexReader indexReader,
String idField,
@@ -82,8 +81,15 @@ public final class LuceneIterator extend
}
/**
+ * @param indexReader {@link IndexReader} to read the documents from.
+ * @param idField field containing the id. May be null.
+ * @param field field to use for the Vector
+ * @param terminfo terminfo
+ * @param weight weight
+ * @param normPower the normalization value. Must be nonnegative, or {@link LuceneIterable#NO_NORMALIZING}
* @param maxPercentErrorDocs most documents that will be tolerated without a term freq vector. In [0,1].
* @see #LuceneIterator(org.apache.lucene.index.IndexReader, String, String, org.apache.mahout.utils.vectors.TermInfo, org.apache.mahout.vectorizer.Weight, double)
+ * @throws java.io.IOException - {@link java.io.IOException}
*/
public LuceneIterator(IndexReader indexReader,
String idField,
@@ -97,7 +103,7 @@ public final class LuceneIterator extend
"If specified normPower must be nonnegative", normPower);
Preconditions.checkArgument(maxPercentErrorDocs >= 0.0 && maxPercentErrorDocs <= 1.0);
if (idField != null) {
- idFieldSelector = new TreeSet();
+ idFieldSelector = new TreeSet<String>();
idFieldSelector.add(idField);
} else {
idFieldSelector = null; /*The field in the index containing the index. If
@@ -114,7 +120,7 @@ public final class LuceneIterator extend
this.terminfo = terminfo;
this.normPower = normPower;
this.weight = weight;
- this.nextDocid = 0;
+ this.nextDocId = 0;
this.maxErrorDocs = (int) (maxPercentErrorDocs * indexReader.numDocs());
}
@@ -125,8 +131,8 @@ public final class LuceneIterator extend
Terms termFreqVector;
do {
- doc = this.nextDocid;
- nextDocid++;
+ doc = this.nextDocId;
+ nextDocId++;
if (doc >= indexReader.maxDoc()) {
return endOfData();
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java Fri Feb 1 11:25:23 2013
@@ -13,11 +13,9 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.mahout.utils.MahoutTestCase;
-import org.apache.mahout.utils.vectors.TermEntry;
import org.junit.Test;
import java.io.IOException;
-import java.util.Iterator;
/**
*
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Fri Feb 1 11:25:23 2013
@@ -84,7 +84,7 @@ public final class LuceneIterableTest ex
}
@Test(expected = IllegalStateException.class)
- public void testIterable_noTermVectors() throws IOException {
+ public void testIterableNoTermVectors() throws IOException {
RAMDirectory directory = createTestIndex(Field.TermVector.NO);
IndexReader reader = DirectoryReader.open(directory);
@@ -99,7 +99,7 @@ public final class LuceneIterableTest ex
}
@Test
- public void testIterable_someNoiseTermVectors() throws IOException {
+ public void testIterableSomeNoiseTermVectors() throws IOException {
//get noise vectors
RAMDirectory directory = createTestIndex(Field.TermVector.YES, new RAMDirectory(), true, 0);
//get real vectors