You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/07/17 17:06:03 UTC

svn commit: r1147646 - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/naivebayes/ core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mah...

Author: srowen
Date: Sun Jul 17 15:06:01 2011
New Revision: 1147646

URL: http://svn.apache.org/viewvc?rev=1147646&view=rev
Log:
Style changes on MAHOUT-763 and new Pagerank code; mostly copyright header and simpler iteration over dirs of sequence files

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Sun Jul 17 15:06:01 2011
@@ -34,16 +34,19 @@ import java.io.IOException;
 /** NaiveBayesModel holds the weight Matrix, the feature and label sums and the weight normalizer vectors.*/
 public class NaiveBayesModel {
 
-  private Vector weightsPerLabel;
-  private Vector perlabelThetaNormalizer;
-  private Vector weightsPerFeature;
-  private Matrix weightsPerLabelAndFeature;
-  private float alphaI;
-  private double numFeatures;
-  private double totalWeightSum;
-
-  public NaiveBayesModel(Matrix weightMatrix, Vector weightsPerFeature, Vector weightsPerLabel, Vector thetaNormalizer,
-      float alphaI) {
+  private final Vector weightsPerLabel;
+  private final Vector perlabelThetaNormalizer;
+  private final Vector weightsPerFeature;
+  private final Matrix weightsPerLabelAndFeature;
+  private final float alphaI;
+  private final double numFeatures;
+  private final double totalWeightSum;
+
+  public NaiveBayesModel(Matrix weightMatrix,
+                         Vector weightsPerFeature,
+                         Vector weightsPerLabel,
+                         Vector thetaNormalizer,
+                         float alphaI) {
     this.weightsPerLabelAndFeature = weightMatrix;
     this.weightsPerFeature = weightsPerFeature;
     this.weightsPerLabel = weightsPerLabel;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java Sun Jul 17 15:06:01 2011
@@ -22,12 +22,12 @@ import org.apache.mahout.math.Vector;
 
 public abstract class AbstractThetaTrainer {
 
-  private Vector weightsPerFeature;
-  private Vector weightsPerLabel;
-  private Vector perLabelThetaNormalizer;
-  private double alphaI;
-  private double totalWeightSum;
-  private double numFeatures;
+  private final Vector weightsPerFeature;
+  private final Vector weightsPerLabel;
+  private final Vector perLabelThetaNormalizer;
+  private final double alphaI;
+  private final double totalWeightSum;
+  private final double numFeatures;
 
   public AbstractThetaTrainer(Vector weightsPerFeature, Vector weightsPerLabel, double alphaI) {
     Preconditions.checkNotNull(weightsPerFeature);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java Sun Jul 17 15:06:01 2011
@@ -50,15 +50,11 @@ public class TrainUtils {
 
   static NaiveBayesModel readModelFromTempDir(Path base, Configuration conf) {
 
-    Vector scoresPerLabel = null;
-    Vector perlabelThetaNormalizer = null;
-    Vector scoresPerFeature = null;
-    Matrix scoresPerLabelAndFeature;
-    float alphaI;
-
-    alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);
+    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);
 
     // read feature sums and label sums
+    Vector scoresPerLabel = null;
+    Vector scoresPerFeature = null;
     for (Pair<Text,VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
         new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
       String key = record.getFirst().toString();
@@ -73,12 +69,13 @@ public class TrainUtils {
     Preconditions.checkNotNull(scoresPerFeature);
     Preconditions.checkNotNull(scoresPerLabel);
 
-    scoresPerLabelAndFeature = new SparseMatrix(new int[] { scoresPerLabel.size(), scoresPerFeature.size() });
+    Matrix scoresPerLabelAndFeature = new SparseMatrix(new int[]{scoresPerLabel.size(), scoresPerFeature.size()});
     for (Pair<IntWritable,VectorWritable> entry : new SequenceFileDirIterable<IntWritable,VectorWritable>(
         new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) {
       scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
     }
 
+    Vector perlabelThetaNormalizer = null;
     for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
         new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) {
       if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.clustering;
  * limitations under the License.
  */
 
+package org.apache.mahout.clustering;
 
 import org.apache.hadoop.io.Text;
 import org.apache.mahout.math.Vector;
@@ -26,13 +26,9 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
-/**
- *
- *
- **/
 public class WeightedPropertyVectorWritable extends WeightedVectorWritable {
 
-  protected Map<Text, Text> properties;
+  private Map<Text, Text> properties;
 
   public WeightedPropertyVectorWritable() {
   }
@@ -82,13 +78,13 @@ public class WeightedPropertyVectorWrita
 
   public String toString() {
     Vector vector = getVector();
-    StringBuilder bldr = new StringBuilder("wt: ").append(weight);
-    if (properties != null && properties.isEmpty() == false) {
+    StringBuilder bldr = new StringBuilder("wt: ").append(getWeight());
+    if (properties != null && !properties.isEmpty()) {
       for (Map.Entry<Text, Text> entry : properties.entrySet()) {
         bldr.append(entry.getKey().toString()).append(": ").append(entry.getValue().toString()).append(' ');
       }
     }
-    bldr.append(" vec: ").append((vector == null ? "null" : AbstractCluster.formatVector(vector, null)));
+    bldr.append(" vec: ").append(vector == null ? "null" : AbstractCluster.formatVector(vector, null));
     return bldr.toString();
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java Sun Jul 17 15:06:01 2011
@@ -27,8 +27,8 @@ import org.apache.mahout.math.VectorWrit
 
 public class WeightedVectorWritable implements Writable {
 
-  protected VectorWritable vectorWritable = new VectorWritable();
-  protected double weight;
+  private final VectorWritable vectorWritable = new VectorWritable();
+  private double weight;
 
   public WeightedVectorWritable() {
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Sun Jul 17 15:06:01 2011
@@ -17,20 +17,16 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
-import java.io.IOException;
 import java.util.Collection;
 
-import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.clustering.canopy.Canopy;
 import org.apache.mahout.clustering.kmeans.Cluster;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
 
 final class FuzzyKMeansUtil {
 
@@ -38,40 +34,26 @@ final class FuzzyKMeansUtil {
   }
 
   /** Configure the mapper with the cluster info */
-  public static void configureWithClusterInfo(Path clusterPathStr, Collection<SoftCluster> clusters)
-    throws IOException {
-    // Get the path location where the cluster Info is stored
-    Configuration conf = new Configuration();
-    Path clusterPath = new Path(clusterPathStr, "*");
-    Collection<Path> result = Lists.newArrayList();
-
-    // get all filtered file names in result list
-    FileSystem fs = clusterPath.getFileSystem(conf);
-    FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters.partFilter())),
-                                         PathFilters.partFilter());
-
-    for (FileStatus match : matches) {
-      result.add(fs.makeQualified(match.getPath()));
-    }
-
-    // iterate through the result path list
-    for (Path path : result) {
-      for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
-        Class<? extends Writable> valueClass = value.getClass();
-        if (valueClass.equals(Cluster.class)) {
-          // get the cluster info
-          Cluster cluster = (Cluster) value;
-          clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
-        } else if (valueClass.equals(SoftCluster.class)) {
-          // get the cluster info
-          clusters.add((SoftCluster) value);
-        } else if (valueClass.equals(Canopy.class)) {
-          // get the cluster info
-          Canopy canopy = (Canopy) value;
-          clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
-        } else {
-          throw new IllegalStateException("Bad value class: " + valueClass);
-        }
+  public static void configureWithClusterInfo(Path clusterPath, Collection<SoftCluster> clusters) {
+    for (Writable value :
+         new SequenceFileDirValueIterable<Writable>(clusterPath,
+                                                    PathType.LIST,
+                                                    PathFilters.partFilter(),
+                                                    new Configuration())) {
+      Class<? extends Writable> valueClass = value.getClass();
+      if (valueClass.equals(Cluster.class)) {
+        // get the cluster info
+        Cluster cluster = (Cluster) value;
+        clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
+      } else if (valueClass.equals(SoftCluster.class)) {
+        // get the cluster info
+        clusters.add((SoftCluster) value);
+      } else if (valueClass.equals(Canopy.class)) {
+        // get the cluster info
+        Canopy canopy = (Canopy) value;
+        clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
+      } else {
+        throw new IllegalStateException("Bad value class: " + valueClass);
       }
     }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Sun Jul 17 15:06:01 2011
@@ -27,7 +27,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.WeightedPropertyVectorWritable;
-import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.VectorWritable;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Sun Jul 17 15:06:01 2011
@@ -22,11 +22,9 @@ import java.util.List;
 import java.util.Map;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.ClusterObservations;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Sun Jul 17 15:06:01 2011
@@ -17,19 +17,15 @@
 
 package org.apache.mahout.clustering.kmeans;
 
-import java.io.IOException;
 import java.util.Collection;
 
-import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.clustering.canopy.Canopy;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
 
 final class KMeansUtil {
 
@@ -38,36 +34,20 @@ final class KMeansUtil {
 
   /** Configure the mapper with the cluster info */
   public static void configureWithClusterInfo(Configuration conf,
-                                              Path clusterPathStr,
-                                              Collection<Cluster> clusters) throws IOException {
-
-    // Get the path location where the cluster Info is stored
-    Path clusterPath = new Path(clusterPathStr, "*");
-    Collection<Path> result = Lists.newArrayList();
-
-    // get all filtered file names in result list
-    FileSystem fs = clusterPath.getFileSystem(conf);
-    FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters.partFilter())),
-                                         PathFilters.partFilter());
-
-    for (FileStatus match : matches) {
-      result.add(fs.makeQualified(match.getPath()));
-    }
-
-    // iterate through the result path list
-    for (Path path : result) {
-      for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
-        Class<? extends Writable> valueClass = value.getClass();
-        if (valueClass.equals(Cluster.class)) {
-          // get the cluster info
-          clusters.add((Cluster) value);
-        } else if (valueClass.equals(Canopy.class)) {
-          // get the cluster info
-          Canopy canopy = (Canopy) value;
-          clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
-        } else {
-          throw new IllegalStateException("Bad value class: " + valueClass);
-        }
+                                              Path clusterPath,
+                                              Collection<Cluster> clusters) {
+    for (Writable value :
+         new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST, PathFilters.partFilter(), conf)) {
+      Class<? extends Writable> valueClass = value.getClass();
+      if (valueClass.equals(Cluster.class)) {
+        // get the cluster info
+        clusters.add((Cluster) value);
+      } else if (valueClass.equals(Canopy.class)) {
+        // get the cluster info
+        Canopy canopy = (Canopy) value;
+        clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
+      } else {
+        throw new IllegalStateException("Bad value class: " + valueClass);
       }
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Sun Jul 17 15:06:01 2011
@@ -22,7 +22,6 @@ import com.google.common.base.Splitter;
 import java.util.List;
 import java.util.Locale;
 
-import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import org.apache.mahout.df.data.Dataset.Attribute;
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java Sun Jul 17 15:06:01 2011
@@ -31,7 +31,7 @@ import java.io.IOException;
 import java.io.InputStream;
 
 /** helper method for working with graphs */
-public class GraphUtils {
+public final class GraphUtils {
 
   private GraphUtils() {}
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java Sun Jul 17 15:06:01 2011
@@ -62,7 +62,7 @@ import org.apache.mahout.math.map.OpenLo
  *
  * <p>The input files need to be a {@link org.apache.hadoop.io.SequenceFile} with {@link Edge}s as keys and
  * any Writable as values and another {@link org.apache.hadoop.io.SequenceFile} with {@link IntWritable}s as keys and {@link Vertex} as
- * values, as produced by {@link org.apache.mahout.graph.common.GraphUtils.indexVertices())}</p>
+ * values, as produced by {@link org.apache.mahout.graph.common.GraphUtils#indexVertices(Configuration, Path, Path)}</p>
  *
  * <p>This job outputs text files with a vertex id and its pagerank per line.</p>
   *
@@ -105,7 +105,7 @@ public class PageRankJob extends Abstrac
     addOption("numIterations", "it", "number of numIterations", String.valueOf(5));
     addOption("teleportationProbability", "tp", "probability to teleport to a random vertex", String.valueOf(0.8));
 
-    Map<String, String> parsedArgs = super.parseArguments(args);
+    Map<String, String> parsedArgs = parseArguments(args);
 
     Path vertexIndex = new Path(parsedArgs.get("--vertexIndex"));
     Path edges = new Path(parsedArgs.get("--edges"));
@@ -116,7 +116,7 @@ public class PageRankJob extends Abstrac
 
     Preconditions.checkArgument(numVertices > 0);
     Preconditions.checkArgument(numIterations > 0);
-    Preconditions.checkArgument(teleportationProbability > 0 && teleportationProbability < +1);
+    Preconditions.checkArgument(teleportationProbability > 0.0 && teleportationProbability < 1.0);
 
     Job indexedDegrees = prepareJob(edges, getTempPath(TMP_INDEXED_DEGREES), SequenceFileInputFormat.class,
         IndexAndCountDegreeMapper.class, IntWritable.class, IntWritable.class, IntSumReducer.class, IntWritable.class,
@@ -245,7 +245,7 @@ public class PageRankJob extends Abstrac
       Vector vector = new RandomAccessSparseVector(numVertices);
       for (IntWritable incidentVertexIndex : incidentVertexIndexes) {
         double weight = weights.get(incidentVertexIndex.get()) * teleportationProbability;
-        System.out.println(vertexIndex.get() + "," + incidentVertexIndex.get() + ": " + weight);
+        //System.out.println(vertexIndex.get() + "," + incidentVertexIndex.get() + ": " + weight);
         vector.set(incidentVertexIndex.get(), weight);
       }
       ctx.write(vertexIndex, new VectorWritable(vector));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java Sun Jul 17 15:06:01 2011
@@ -79,7 +79,7 @@ public class Edge implements WritableCom
 
   @Override
   public String toString() {
-    return "(" + start.getId() + "," + end.getId() + ")";
+    return "(" + start.getId() + ',' + end.getId() + ')';
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java Sun Jul 17 15:06:01 2011
@@ -112,7 +112,7 @@ public class EnumerateTrianglesJob exten
         while (bufferedVertexIdsIterator.hasNext()) {
           Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong());
           UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge);
-          System.out.println(new JoinableUndirectedEdge(missingEdge, false) + " " + new VertexOrMarker(vertex));
+          //System.out.println(new JoinableUndirectedEdge(missingEdge, false) + " " + new VertexOrMarker(vertex));
           ctx.write(new JoinableUndirectedEdge(missingEdge, false), new VertexOrMarker(vertex));
         }
         bufferedVertexIDs.add(firstVertexOfMissingEdge.getId());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java Sun Jul 17 15:06:01 2011
@@ -43,7 +43,7 @@ public class VertexOrMarker implements W
   }
 
   private VertexOrMarker(boolean marker) {
-    this.marker = true;
+    this.marker = marker;
   }
 
   public boolean isMarker() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Sun Jul 17 15:06:01 2011
@@ -72,7 +72,7 @@ import org.apache.mahout.math.hadoop.sim
  * <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
  * arguments.</p>
  */
-public class RowSimilarityJob extends AbstractJob {
+public final class RowSimilarityJob extends AbstractJob {
 
   public static final String DISTRIBUTED_SIMILARITY_CLASSNAME =
       RowSimilarityJob.class.getName() + ".distributedSimilarityClassname";

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,104 +15,90 @@ package org.apache.mahout.math.hadoop.si
  * limitations under the License.
  */
 
+package org.apache.mahout.math.hadoop.similarity;
 
 import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.clustering.canopy.Canopy;
 import org.apache.mahout.clustering.kmeans.Cluster;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
-/**
- *
- *
- **/
-class SeedVectorUtil {
-  private transient static Logger log = LoggerFactory.getLogger(SeedVectorUtil.class);
+final class SeedVectorUtil {
 
-  private SeedVectorUtil() {
+  private static final Logger log = LoggerFactory.getLogger(SeedVectorUtil.class);
 
+  private SeedVectorUtil() {
   }
 
-  public static void loadSeedVectors(Configuration conf, List<NamedVector> seedVectors) throws IOException {
+  public static List<NamedVector> loadSeedVectors(Configuration conf) {
 
     String seedPathStr = conf.get(VectorDistanceSimilarityJob.SEEDS_PATH_KEY);
-    if (seedPathStr != null && seedPathStr.length() > 0) {
-
-      Path thePath = new Path(seedPathStr, "*");
-      Collection<Path> result = Lists.newArrayList();
-
-      // get all filtered file names in result list
-      FileSystem fs = thePath.getFileSystem(conf);
-      FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(thePath, PathFilters.partFilter())),
-              PathFilters.partFilter());
-
-      for (FileStatus match : matches) {
-        result.add(fs.makeQualified(match.getPath()));
-      }
+    if (seedPathStr == null || seedPathStr.length() <= 0) {
+      return Collections.emptyList();
+    }
 
-      long item = 0;
-      for (Path seedPath : result) {
-        for (Writable value : new SequenceFileValueIterable<Writable>(seedPath, conf)) {
-          Class<? extends Writable> valueClass = value.getClass();
-          if (valueClass.equals(Cluster.class)) {
-            // get the cluster info
-            Cluster cluster = (Cluster) value;
-            Vector vector = cluster.getCenter();
-            if (vector instanceof NamedVector) {
-              seedVectors.add((NamedVector) vector);
-            } else {
-              seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
-            }
-          } else if (valueClass.equals(Canopy.class)) {
-            // get the cluster info
-            Canopy canopy = (Canopy) value;
-            Vector vector = canopy.getCenter();
-            if (vector instanceof NamedVector) {
-              seedVectors.add((NamedVector) vector);
-            } else {
-              seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
-            }
-          } else if (valueClass.equals(Vector.class)) {
-            Vector vector = (Vector) value;
-            if (vector instanceof NamedVector) {
-              seedVectors.add((NamedVector) vector);
-            } else {
-              seedVectors.add(new NamedVector(vector, seedPath + "." + item++));
-            }
-          } else if (valueClass.equals(VectorWritable.class) || valueClass.isInstance(VectorWritable.class)) {
-            VectorWritable vw = (VectorWritable) value;
-            Vector vector = vw.get();
-            if (vector instanceof NamedVector) {
-              seedVectors.add((NamedVector) vector);
-            } else {
-              seedVectors.add(new NamedVector(vector, seedPath + "." + item++));
-            }
-          } else {
-            throw new IllegalStateException("Bad value class: " + valueClass);
-          }
+    List<NamedVector> seedVectors = Lists.newArrayList();
+    long item = 0;
+    for (Writable value :
+         new SequenceFileDirValueIterable<Writable>(new Path(seedPathStr),
+                                                    PathType.LIST,
+                                                    PathFilters.partFilter(),
+                                                    conf)) {
+      Class<? extends Writable> valueClass = value.getClass();
+      if (valueClass.equals(Cluster.class)) {
+        // get the cluster info
+        Cluster cluster = (Cluster) value;
+        Vector vector = cluster.getCenter();
+        if (vector instanceof NamedVector) {
+          seedVectors.add((NamedVector) vector);
+        } else {
+          seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
+        }
+      } else if (valueClass.equals(Canopy.class)) {
+        // get the cluster info
+        Canopy canopy = (Canopy) value;
+        Vector vector = canopy.getCenter();
+        if (vector instanceof NamedVector) {
+          seedVectors.add((NamedVector) vector);
+        } else {
+          seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
+        }
+      } else if (valueClass.equals(Vector.class)) {
+        Vector vector = (Vector) value;
+        if (vector instanceof NamedVector) {
+          seedVectors.add((NamedVector) vector);
+        } else {
+          seedVectors.add(new NamedVector(vector, seedPathStr + '.' + item++));
+        }
+      } else if (valueClass.equals(VectorWritable.class) || valueClass.isInstance(VectorWritable.class)) {
+        VectorWritable vw = (VectorWritable) value;
+        Vector vector = vw.get();
+        if (vector instanceof NamedVector) {
+          seedVectors.add((NamedVector) vector);
+        } else {
+          seedVectors.add(new NamedVector(vector, seedPathStr + '.' + item++));
         }
-      }
-      if (seedVectors.isEmpty()) {
-        throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr);
       } else {
-        log.info("Seed Vectors size: " + seedVectors.size());
+        throw new IllegalStateException("Bad value class: " + valueClass);
       }
     }
+    if (seedVectors.isEmpty()) {
+      throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr);
+    }
+    log.info("Seed Vectors size: {}", seedVectors.size());
+    return seedVectors;
   }
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
  * limitations under the License.
  */
 
+package org.apache.mahout.math.hadoop.similarity;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
@@ -26,24 +26,23 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 
 /**
- * Similar to {@link org.apache.mahout.math.hadoop.similarity.VectorDistanceMapper}, except it outputs
+ * Similar to {@link VectorDistanceMapper}, except it outputs
  * &lt;input, Vector&gt;, where the vector is a dense vector contain one entry for every seed vector
  */
-public class VectorDistanceInvertedMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
-  private transient static Logger log = LoggerFactory.getLogger(VectorDistanceInvertedMapper.class);
-  protected DistanceMeasure measure;
-  protected List<NamedVector> seedVectors;
+public final class VectorDistanceInvertedMapper
+    extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
+
+  private DistanceMeasure measure;
+  private List<NamedVector> seedVectors;
 
   @Override
-  protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException {
+  protected void map(WritableComparable<?> key, VectorWritable value, Context context)
+    throws IOException, InterruptedException {
     String keyName;
     Vector valVec = value.get();
     if (valVec instanceof NamedVector) {
@@ -68,8 +67,7 @@ public class VectorDistanceInvertedMappe
       measure = ccl.loadClass(conf.get(VectorDistanceSimilarityJob.DISTANCE_MEASURE_KEY))
               .asSubclass(DistanceMeasure.class).newInstance();
       measure.configure(conf);
-      seedVectors = new ArrayList<NamedVector>(1000);
-      SeedVectorUtil.loadSeedVectors(conf, seedVectors);
+      seedVectors = SeedVectorUtil.loadSeedVectors(conf);
     } catch (InstantiationException e) {
       throw new IllegalStateException(e);
     } catch (IllegalAccessException e) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
  * limitations under the License.
  */
 
+package org.apache.mahout.math.hadoop.similarity;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
@@ -26,24 +26,19 @@ import org.apache.mahout.common.distance
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 
-/**
- *
- *
- **/
-public class VectorDistanceMapper extends Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable> {
-  private transient static Logger log = LoggerFactory.getLogger(VectorDistanceMapper.class);
-  protected DistanceMeasure measure;
-  protected List<NamedVector> seedVectors;
+public final class VectorDistanceMapper
+    extends Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable> {
+
+  private DistanceMeasure measure;
+  private List<NamedVector> seedVectors;
 
   @Override
-  protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException {
+  protected void map(WritableComparable<?> key, VectorWritable value, Context context)
+    throws IOException, InterruptedException {
     String keyName;
     Vector valVec = value.get();
     if (valVec instanceof NamedVector) {
@@ -69,8 +64,7 @@ public class VectorDistanceMapper extend
       measure = ccl.loadClass(conf.get(VectorDistanceSimilarityJob.DISTANCE_MEASURE_KEY))
               .asSubclass(DistanceMeasure.class).newInstance();
       measure.configure(conf);
-      seedVectors = new ArrayList<NamedVector>(1000);
-      SeedVectorUtil.loadSeedVectors(conf, seedVectors);
+      seedVectors = SeedVectorUtil.loadSeedVectors(conf);
     } catch (InstantiationException e) {
       throw new IllegalStateException(e);
     } catch (IllegalAccessException e) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
  * limitations under the License.
  */
 
+package org.apache.mahout.math.hadoop.similarity;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -34,8 +34,6 @@ import org.apache.mahout.common.commandl
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
@@ -44,7 +42,7 @@ import java.io.IOException;
  * and emits the a tuple of seed id, other id, distance.  It is a more generic version of KMean's mapper
  */
 public class VectorDistanceSimilarityJob extends AbstractJob {
-  private static final Logger log = LoggerFactory.getLogger(VectorDistanceSimilarityJob.class);
+
   public static final String SEEDS = "seeds";
   public static final String SEEDS_PATH_KEY = "seedsPath";
   public static final String DISTANCE_MEASURE_KEY = "vectorDistSim.measure";
@@ -62,7 +60,10 @@ public class VectorDistanceSimilarityJob
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
     addOption(SEEDS, "s", "The set of vectors to compute distances against.  Must fit in memory on the mapper");
     addOption(DefaultOptionCreator.overwriteOption().create());
-    addOption(OUT_TYPE_KEY, "ot", "[pw|v] -- Define the output style: pairwise, the default, (pw) or vector (v).  Pairwise is a tuple of <seed, other, distance>, vector is <other, <Vector of size the number of seeds>>.", "pw");
+    addOption(OUT_TYPE_KEY, "ot",
+              "[pw|v] -- Define the output style: pairwise, the default, (pw) or vector (v).  Pairwise is a "
+                  + "tuple of <seed, other, distance>, vector is <other, <Vector of size the number of seeds>>.",
+              "pw");
     if (parseArguments(args) == null) {
       return -1;
     }
@@ -95,26 +96,27 @@ public class VectorDistanceSimilarityJob
                          Path input,
                          Path seeds,
                          Path output,
-                         DistanceMeasure measure, String outType) throws IOException, ClassNotFoundException, InterruptedException {
+                         DistanceMeasure measure, String outType)
+    throws IOException, ClassNotFoundException, InterruptedException {
     conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
     conf.set(SEEDS_PATH_KEY, seeds.toString());
     Job job = new Job(conf, "Vector Distance Similarity: seeds: " + seeds + " input: " + input);
     job.setInputFormatClass(SequenceFileInputFormat.class);
     job.setOutputFormatClass(SequenceFileOutputFormat.class);
-    if (outType.equalsIgnoreCase("pw")) {
+    if ("pw".equalsIgnoreCase(outType)) {
       job.setMapOutputKeyClass(StringTuple.class);
       job.setOutputKeyClass(StringTuple.class);
       job.setMapOutputValueClass(DoubleWritable.class);
       job.setOutputValueClass(DoubleWritable.class);
       job.setMapperClass(VectorDistanceMapper.class);
-    } else if (outType.equalsIgnoreCase("v")) {
+    } else if ("v".equalsIgnoreCase(outType)) {
       job.setMapOutputKeyClass(Text.class);
       job.setOutputKeyClass(Text.class);
       job.setMapOutputValueClass(VectorWritable.class);
       job.setOutputValueClass(VectorWritable.class);
       job.setMapperClass(VectorDistanceInvertedMapper.class);
     } else {
-      throw new InterruptedException("Invalid outType specified: " + outType);
+      throw new IllegalArgumentException("Invalid outType specified: " + outType);
     }
 
 
@@ -125,7 +127,7 @@ public class VectorDistanceSimilarityJob
     job.setJarByClass(VectorDistanceSimilarityJob.class);
     HadoopUtil.delete(conf, output);
     if (!job.waitForCompletion(true)) {
-      throw new InterruptedException("VectorDistance Similarity failed processing " + seeds);
+      throw new IllegalStateException("VectorDistance Similarity failed processing " + seeds);
     }
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java Sun Jul 17 15:06:01 2011
@@ -27,7 +27,7 @@ import org.apache.mahout.math.Varint;
 /**
  * an entry in a row vector stored together with a precomputed weight of the row
  */
-class WeightedOccurrence implements Writable, Cloneable {
+final class WeightedOccurrence implements Writable, Cloneable {
 
   private int row;
   private double value;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java Sun Jul 17 15:06:01 2011
@@ -24,7 +24,7 @@ import org.apache.hadoop.io.ArrayWritabl
 /**
  * an array of {@link WeightedOccurrence}s
  */
-class WeightedOccurrenceArray extends ArrayWritable {
+final class WeightedOccurrenceArray extends ArrayWritable {
 
   WeightedOccurrenceArray() {
     super(WeightedOccurrence.class);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java Sun Jul 17 15:06:01 2011
@@ -68,7 +68,7 @@ public final class TrainAdaptiveLogistic
 
       CsvRecordFactory csv = lmp.getCsvRecordFactory();
       model = lmp.createAdaptiveLogisticRegression();
-      State<Wrapper, CrossFoldLearner> best = null;
+      State<Wrapper, CrossFoldLearner> best;
       CrossFoldLearner learner = null;
 
       int k = 0;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Sun Jul 17 15:06:01 2011
@@ -53,7 +53,7 @@ public final class Job extends AbstractJ
       Path output = new Path("output");
       Configuration conf = new Configuration();
       HadoopUtil.delete(conf, output);
-      new Job().run(conf, new Path("testdata"), output,
+      run(conf, new Path("testdata"), output,
           new EuclideanDistanceMeasure(), new TriangularKernelProfile(), 47.6,
           1, 0.5, 10);
     }
@@ -130,10 +130,16 @@ public final class Job extends AbstractJ
    * @param maxIterations
    *          the int maximum number of iterations
    */
-  public void run(Configuration conf, Path input, Path output,
-      DistanceMeasure measure, IKernelProfile kernelProfile, double t1,
-      double t2, double convergenceDelta, int maxIterations)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  public static void run(Configuration conf,
+                         Path input,
+                         Path output,
+                         DistanceMeasure measure,
+                         IKernelProfile kernelProfile,
+                         double t1,
+                         double t2,
+                         double convergenceDelta,
+                         int maxIterations)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Path directoryContainingConvertedInput = new Path(output,
         DIRECTORY_CONTAINING_CONVERTED_INPUT);
     InputDriver.runJob(input, directoryContainingConvertedInput);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Sun Jul 17 15:06:01 2011
@@ -152,8 +152,6 @@ public final class VectorDumper {
             }
             writer.write('\n');
           }
-          long i = 0;
-          long count = 0;
           long numItems = Long.MAX_VALUE;
           if (cmdLine.hasOption(numItemsOpt)) {
             numItems = Long.parseLong(cmdLine.getValue(numItemsOpt).toString());
@@ -161,6 +159,8 @@ public final class VectorDumper {
           }
           SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(path, true, conf);
           Iterator<Pair<Writable,Writable>> iterator = iterable.iterator();
+          long i = 0;
+          long count = 0;
           while (iterator.hasNext() && count < numItems) {
             Pair<Writable, Writable> record = iterator.next();
             Writable keyWritable = record.getFirst();