You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/02/01 12:25:23 UTC

svn commit: r1441391 - in /mahout/trunk: core/src/main/java/org/apache/mahout/common/ core/src/main/java/org/apache/mahout/common/lucene/ core/src/main/java/org/apache/mahout/vectorizer/ core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/...

Author: gsingers
Date: Fri Feb  1 11:25:23 2013
New Revision: 1441391

URL: http://svn.apache.org/viewvc?rev=1441391&view=rev
Log:
MAHOUT-1145: style fixes

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Fri Feb  1 11:25:23 2013
@@ -59,7 +59,6 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Preconditions;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
 
 /**
  * <p>Superclass of many Mahout Hadoop "jobs". A job drives configuration and launch of one or

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Fri Feb  1 11:25:23 2013
@@ -30,16 +30,15 @@ public class AnalyzerUtils {
   /**
    * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in parameters
    * to your constructor, you will need to wrap it in an implementation that does not take any arguments
-   * @param analyzerClassName
-   * @return
-   * @throws ClassNotFoundException
+   * @param analyzerClassName - Lucene Analyzer Name
+   * @return {@link Analyzer}
+   * @throws ClassNotFoundException - {@link ClassNotFoundException}
    */
   public static Analyzer createAnalyzer(String analyzerClassName) throws ClassNotFoundException {
     return createAnalyzer(analyzerClassName, Version.LUCENE_41);
   }
 
   public static Analyzer createAnalyzer(String analyzerClassName, Version version) throws ClassNotFoundException {
-    Analyzer analyzer = null;
     Class<? extends Analyzer> analyzerClass = Class.forName(analyzerClassName).asSubclass(Analyzer.class);
     //TODO: GSI: Not sure I like this, many analyzers in Lucene take in the version
 
@@ -50,14 +49,14 @@ public class AnalyzerUtils {
    * Create an Analyzer using the latest {@link org.apache.lucene.util.Version}.  Note, if you need to pass in parameters
    * to your constructor, you will need to wrap it in an implementation that does not take any arguments
    * @param analyzerClass The Analyzer Class to instantiate
-   * @return
+   * @return {@link Analyzer}
    */
   public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass){
     return createAnalyzer(analyzerClass, Version.LUCENE_41);
   }
 
   public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version){
-    Analyzer analyzer = null;
+    Analyzer analyzer;
     if (analyzerClass == StandardAnalyzer.class) {
       Class<?>[] params = new Class<?>[1];
       params[0] = Version.class;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java Fri Feb  1 11:25:23 2013
@@ -21,7 +21,6 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.lucene.AnalyzerUtils;
 import org.apache.mahout.math.NamedVector;
@@ -45,7 +44,7 @@ public class EncodingMapper extends Mapp
   public static final String ENCODER_FIELD_NAME = "encoderFieldName";
   public static final String ENCODER_CLASS = "encoderClass";
   public static final String CARDINALITY = "cardinality";
-  private boolean sequentialVecs;
+  private boolean sequentialVectors;
   private boolean namedVectors;
   private FeatureVectorEncoder encoder;
   private int cardinality;
@@ -53,10 +52,10 @@ public class EncodingMapper extends Mapp
   @Override
   protected void setup(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
-    sequentialVecs = conf.getBoolean(USE_SEQUENTIAL, false);
+    sequentialVectors = conf.getBoolean(USE_SEQUENTIAL, false);
     namedVectors = conf.getBoolean(USE_NAMED_VECTORS, false);
     String analyzerName = conf.get(ANALYZER_NAME, StandardAnalyzer.class.getName());
-    Analyzer analyzer = null;
+    Analyzer analyzer;
     try {
       analyzer = AnalyzerUtils.createAnalyzer(analyzerName);
     } catch (ClassNotFoundException e) {
@@ -79,7 +78,7 @@ public class EncodingMapper extends Mapp
   @Override
   protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
     Vector vector;
-    if (sequentialVecs) {
+    if (sequentialVectors) {
       vector = new SequentialAccessSparseVector(cardinality);
     } else {
       vector = new RandomAccessSparseVector(cardinality);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Fri Feb  1 11:25:23 2013
@@ -33,7 +33,6 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.Pair;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Fri Feb  1 11:25:23 2013
@@ -35,7 +35,6 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.lucene.AnalyzerUtils;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java Fri Feb  1 11:25:23 2013
@@ -28,8 +28,6 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.Version;
-import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.StringTuple;
 import org.apache.mahout.common.lucene.AnalyzerUtils;
 import org.apache.mahout.vectorizer.DocumentProcessor;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java Fri Feb  1 11:25:23 2013
@@ -17,18 +17,11 @@
 
 package org.apache.mahout.vectorizer.collocations.llr;
 
-import java.io.Reader;
-import java.util.Collections;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.Counters.Counter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.util.Version;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.StringTuple;
 import org.apache.mahout.vectorizer.collocations.llr.Gram.Type;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Fri Feb  1 11:25:23 2013
@@ -19,10 +19,8 @@ package org.apache.mahout.text;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
-import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Fri Feb  1 11:25:23 2013
@@ -18,7 +18,6 @@
 package org.apache.mahout.text.wikipedia;
 
 import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java Fri Feb  1 11:25:23 2013
@@ -19,7 +19,6 @@ package org.apache.mahout.utils.vectors.
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.AbstractIterator;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -51,11 +50,8 @@ public final class LuceneIterator extend
   private final Set<String> idFieldSelector;
   private final TermInfo terminfo;
   private final double normPower;
-  private DocsEnum termDocs = null;
-
-
-  private int nextDocid;
 
+  private int nextDocId;
 
   private int numErrorDocs = 0;
   private int maxErrorDocs = 0;
@@ -70,7 +66,10 @@ public final class LuceneIterator extend
    * @param indexReader {@link IndexReader} to read the documents from.
    * @param idField     field containing the id. May be null.
    * @param field       field to use for the Vector
+   * @param terminfo    terminfo
+   * @param weight      weight
    * @param normPower   the normalization value. Must be nonnegative, or {@link LuceneIterable#NO_NORMALIZING}
+   * @throws java.io.IOException - {@link java.io.IOException}
    */
   public LuceneIterator(IndexReader indexReader,
                         String idField,
@@ -82,8 +81,15 @@ public final class LuceneIterator extend
   }
 
   /**
+   * @param indexReader {@link IndexReader} to read the documents from.
+   * @param idField    field containing the id. May be null.
+   * @param field      field to use for the Vector
+   * @param terminfo   terminfo
+   * @param weight     weight
+   * @param normPower  the normalization value. Must be nonnegative, or {@link LuceneIterable#NO_NORMALIZING}
    * @param maxPercentErrorDocs most documents that will be tolerated without a term freq vector. In [0,1].
    * @see #LuceneIterator(org.apache.lucene.index.IndexReader, String, String, org.apache.mahout.utils.vectors.TermInfo, org.apache.mahout.vectorizer.Weight, double)
+   * @throws java.io.IOException - {@link java.io.IOException}
    */
   public LuceneIterator(IndexReader indexReader,
                         String idField,
@@ -97,7 +103,7 @@ public final class LuceneIterator extend
             "If specified normPower must be nonnegative", normPower);
     Preconditions.checkArgument(maxPercentErrorDocs >= 0.0 && maxPercentErrorDocs <= 1.0);
     if (idField != null) {
-      idFieldSelector = new TreeSet();
+      idFieldSelector = new TreeSet<String>();
       idFieldSelector.add(idField);
     } else {
       idFieldSelector = null; /*The field in the index  containing the index.  If
@@ -114,7 +120,7 @@ public final class LuceneIterator extend
     this.terminfo = terminfo;
     this.normPower = normPower;
     this.weight = weight;
-    this.nextDocid = 0;
+    this.nextDocId = 0;
     this.maxErrorDocs = (int) (maxPercentErrorDocs * indexReader.numDocs());
   }
 
@@ -125,8 +131,8 @@ public final class LuceneIterator extend
       Terms termFreqVector;
 
       do {
-        doc = this.nextDocid;
-        nextDocid++;
+        doc = this.nextDocId;
+        nextDocId++;
 
         if (doc >= indexReader.maxDoc()) {
           return endOfData();

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java Fri Feb  1 11:25:23 2013
@@ -13,11 +13,9 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.mahout.utils.MahoutTestCase;
-import org.apache.mahout.utils.vectors.TermEntry;
 import org.junit.Test;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 /**
  *

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=1441391&r1=1441390&r2=1441391&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Fri Feb  1 11:25:23 2013
@@ -84,7 +84,7 @@ public final class LuceneIterableTest ex
   }
 
   @Test(expected = IllegalStateException.class)
-  public void testIterable_noTermVectors() throws IOException {
+  public void testIterableNoTermVectors() throws IOException {
     RAMDirectory directory = createTestIndex(Field.TermVector.NO);
     IndexReader reader = DirectoryReader.open(directory);
     
@@ -99,7 +99,7 @@ public final class LuceneIterableTest ex
   }
 
   @Test
-  public void testIterable_someNoiseTermVectors() throws IOException {
+  public void testIterableSomeNoiseTermVectors() throws IOException {
     //get noise vectors
     RAMDirectory directory = createTestIndex(Field.TermVector.YES, new RAMDirectory(), true, 0);
     //get real vectors