You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/07/17 17:06:03 UTC
svn commit: r1147646 - in /mahout/trunk:
core/src/main/java/org/apache/mahout/classifier/naivebayes/
core/src/main/java/org/apache/mahout/classifier/naivebayes/training/
core/src/main/java/org/apache/mahout/clustering/
core/src/main/java/org/apache/mah...
Author: srowen
Date: Sun Jul 17 15:06:01 2011
New Revision: 1147646
URL: http://svn.apache.org/viewvc?rev=1147646&view=rev
Log:
Style changes on MAHOUT-763 and new Pagerank code; mostly copyright header and simpler iteration over dirs of sequence files
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java
mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Sun Jul 17 15:06:01 2011
@@ -34,16 +34,19 @@ import java.io.IOException;
/** NaiveBayesModel holds the weight Matrix, the feature and label sums and the weight normalizer vectors.*/
public class NaiveBayesModel {
- private Vector weightsPerLabel;
- private Vector perlabelThetaNormalizer;
- private Vector weightsPerFeature;
- private Matrix weightsPerLabelAndFeature;
- private float alphaI;
- private double numFeatures;
- private double totalWeightSum;
-
- public NaiveBayesModel(Matrix weightMatrix, Vector weightsPerFeature, Vector weightsPerLabel, Vector thetaNormalizer,
- float alphaI) {
+ private final Vector weightsPerLabel;
+ private final Vector perlabelThetaNormalizer;
+ private final Vector weightsPerFeature;
+ private final Matrix weightsPerLabelAndFeature;
+ private final float alphaI;
+ private final double numFeatures;
+ private final double totalWeightSum;
+
+ public NaiveBayesModel(Matrix weightMatrix,
+ Vector weightsPerFeature,
+ Vector weightsPerLabel,
+ Vector thetaNormalizer,
+ float alphaI) {
this.weightsPerLabelAndFeature = weightMatrix;
this.weightsPerFeature = weightsPerFeature;
this.weightsPerLabel = weightsPerLabel;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/AbstractThetaTrainer.java Sun Jul 17 15:06:01 2011
@@ -22,12 +22,12 @@ import org.apache.mahout.math.Vector;
public abstract class AbstractThetaTrainer {
- private Vector weightsPerFeature;
- private Vector weightsPerLabel;
- private Vector perLabelThetaNormalizer;
- private double alphaI;
- private double totalWeightSum;
- private double numFeatures;
+ private final Vector weightsPerFeature;
+ private final Vector weightsPerLabel;
+ private final Vector perLabelThetaNormalizer;
+ private final double alphaI;
+ private final double totalWeightSum;
+ private final double numFeatures;
public AbstractThetaTrainer(Vector weightsPerFeature, Vector weightsPerLabel, double alphaI) {
Preconditions.checkNotNull(weightsPerFeature);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainUtils.java Sun Jul 17 15:06:01 2011
@@ -50,15 +50,11 @@ public class TrainUtils {
static NaiveBayesModel readModelFromTempDir(Path base, Configuration conf) {
- Vector scoresPerLabel = null;
- Vector perlabelThetaNormalizer = null;
- Vector scoresPerFeature = null;
- Matrix scoresPerLabelAndFeature;
- float alphaI;
-
- alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);
+ float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);
// read feature sums and label sums
+ Vector scoresPerLabel = null;
+ Vector scoresPerFeature = null;
for (Pair<Text,VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
String key = record.getFirst().toString();
@@ -73,12 +69,13 @@ public class TrainUtils {
Preconditions.checkNotNull(scoresPerFeature);
Preconditions.checkNotNull(scoresPerLabel);
- scoresPerLabelAndFeature = new SparseMatrix(new int[] { scoresPerLabel.size(), scoresPerFeature.size() });
+ Matrix scoresPerLabelAndFeature = new SparseMatrix(new int[]{scoresPerLabel.size(), scoresPerFeature.size()});
for (Pair<IntWritable,VectorWritable> entry : new SequenceFileDirIterable<IntWritable,VectorWritable>(
new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) {
scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
}
+ Vector perlabelThetaNormalizer = null;
for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(
new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) {
if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedPropertyVectorWritable.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.clustering;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.clustering;
* limitations under the License.
*/
+package org.apache.mahout.clustering;
import org.apache.hadoop.io.Text;
import org.apache.mahout.math.Vector;
@@ -26,13 +26,9 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-/**
- *
- *
- **/
public class WeightedPropertyVectorWritable extends WeightedVectorWritable {
- protected Map<Text, Text> properties;
+ private Map<Text, Text> properties;
public WeightedPropertyVectorWritable() {
}
@@ -82,13 +78,13 @@ public class WeightedPropertyVectorWrita
public String toString() {
Vector vector = getVector();
- StringBuilder bldr = new StringBuilder("wt: ").append(weight);
- if (properties != null && properties.isEmpty() == false) {
+ StringBuilder bldr = new StringBuilder("wt: ").append(getWeight());
+ if (properties != null && !properties.isEmpty()) {
for (Map.Entry<Text, Text> entry : properties.entrySet()) {
bldr.append(entry.getKey().toString()).append(": ").append(entry.getValue().toString()).append(' ');
}
}
- bldr.append(" vec: ").append((vector == null ? "null" : AbstractCluster.formatVector(vector, null)));
+ bldr.append(" vec: ").append(vector == null ? "null" : AbstractCluster.formatVector(vector, null));
return bldr.toString();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/WeightedVectorWritable.java Sun Jul 17 15:06:01 2011
@@ -27,8 +27,8 @@ import org.apache.mahout.math.VectorWrit
public class WeightedVectorWritable implements Writable {
- protected VectorWritable vectorWritable = new VectorWritable();
- protected double weight;
+ private final VectorWritable vectorWritable = new VectorWritable();
+ private double weight;
public WeightedVectorWritable() {
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Sun Jul 17 15:06:01 2011
@@ -17,20 +17,16 @@
package org.apache.mahout.clustering.fuzzykmeans;
-import java.io.IOException;
import java.util.Collection;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
final class FuzzyKMeansUtil {
@@ -38,40 +34,26 @@ final class FuzzyKMeansUtil {
}
/** Configure the mapper with the cluster info */
- public static void configureWithClusterInfo(Path clusterPathStr, Collection<SoftCluster> clusters)
- throws IOException {
- // Get the path location where the cluster Info is stored
- Configuration conf = new Configuration();
- Path clusterPath = new Path(clusterPathStr, "*");
- Collection<Path> result = Lists.newArrayList();
-
- // get all filtered file names in result list
- FileSystem fs = clusterPath.getFileSystem(conf);
- FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters.partFilter())),
- PathFilters.partFilter());
-
- for (FileStatus match : matches) {
- result.add(fs.makeQualified(match.getPath()));
- }
-
- // iterate through the result path list
- for (Path path : result) {
- for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
- Class<? extends Writable> valueClass = value.getClass();
- if (valueClass.equals(Cluster.class)) {
- // get the cluster info
- Cluster cluster = (Cluster) value;
- clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
- } else if (valueClass.equals(SoftCluster.class)) {
- // get the cluster info
- clusters.add((SoftCluster) value);
- } else if (valueClass.equals(Canopy.class)) {
- // get the cluster info
- Canopy canopy = (Canopy) value;
- clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
- } else {
- throw new IllegalStateException("Bad value class: " + valueClass);
- }
+ public static void configureWithClusterInfo(Path clusterPath, Collection<SoftCluster> clusters) {
+ for (Writable value :
+ new SequenceFileDirValueIterable<Writable>(clusterPath,
+ PathType.LIST,
+ PathFilters.partFilter(),
+ new Configuration())) {
+ Class<? extends Writable> valueClass = value.getClass();
+ if (valueClass.equals(Cluster.class)) {
+ // get the cluster info
+ Cluster cluster = (Cluster) value;
+ clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
+ } else if (valueClass.equals(SoftCluster.class)) {
+ // get the cluster info
+ clusters.add((SoftCluster) value);
+ } else if (valueClass.equals(Canopy.class)) {
+ // get the cluster info
+ Canopy canopy = (Canopy) value;
+ clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
+ } else {
+ throw new IllegalStateException("Bad value class: " + valueClass);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Sun Jul 17 15:06:01 2011
@@ -27,7 +27,6 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.WeightedPropertyVectorWritable;
-import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.VectorWritable;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Sun Jul 17 15:06:01 2011
@@ -22,11 +22,9 @@ import java.util.List;
import java.util.Map;
import com.google.common.collect.Lists;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.ClusterObservations;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Sun Jul 17 15:06:01 2011
@@ -17,19 +17,15 @@
package org.apache.mahout.clustering.kmeans;
-import java.io.IOException;
import java.util.Collection;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
final class KMeansUtil {
@@ -38,36 +34,20 @@ final class KMeansUtil {
/** Configure the mapper with the cluster info */
public static void configureWithClusterInfo(Configuration conf,
- Path clusterPathStr,
- Collection<Cluster> clusters) throws IOException {
-
- // Get the path location where the cluster Info is stored
- Path clusterPath = new Path(clusterPathStr, "*");
- Collection<Path> result = Lists.newArrayList();
-
- // get all filtered file names in result list
- FileSystem fs = clusterPath.getFileSystem(conf);
- FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters.partFilter())),
- PathFilters.partFilter());
-
- for (FileStatus match : matches) {
- result.add(fs.makeQualified(match.getPath()));
- }
-
- // iterate through the result path list
- for (Path path : result) {
- for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
- Class<? extends Writable> valueClass = value.getClass();
- if (valueClass.equals(Cluster.class)) {
- // get the cluster info
- clusters.add((Cluster) value);
- } else if (valueClass.equals(Canopy.class)) {
- // get the cluster info
- Canopy canopy = (Canopy) value;
- clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
- } else {
- throw new IllegalStateException("Bad value class: " + valueClass);
- }
+ Path clusterPath,
+ Collection<Cluster> clusters) {
+ for (Writable value :
+ new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST, PathFilters.partFilter(), conf)) {
+ Class<? extends Writable> valueClass = value.getClass();
+ if (valueClass.equals(Cluster.class)) {
+ // get the cluster info
+ clusters.add((Cluster) value);
+ } else if (valueClass.equals(Canopy.class)) {
+ // get the cluster info
+ Canopy canopy = (Canopy) value;
+ clusters.add(new Cluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
+ } else {
+ throw new IllegalStateException("Bad value class: " + valueClass);
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Sun Jul 17 15:06:01 2011
@@ -22,7 +22,6 @@ import com.google.common.base.Splitter;
import java.util.List;
import java.util.Locale;
-import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import org.apache.mahout.df.data.Dataset.Attribute;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/GraphUtils.java Sun Jul 17 15:06:01 2011
@@ -31,7 +31,7 @@ import java.io.IOException;
import java.io.InputStream;
/** helper method for working with graphs */
-public class GraphUtils {
+public final class GraphUtils {
private GraphUtils() {}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/linkanalysis/PageRankJob.java Sun Jul 17 15:06:01 2011
@@ -62,7 +62,7 @@ import org.apache.mahout.math.map.OpenLo
*
* <p>The input files need to be a {@link org.apache.hadoop.io.SequenceFile} with {@link Edge}s as keys and
* any Writable as values and another {@link org.apache.hadoop.io.SequenceFile} with {@link IntWritable}s as keys and {@link Vertex} as
- * values, as produced by {@link org.apache.mahout.graph.common.GraphUtils.indexVertices())}</p>
+ * values, as produced by {@link org.apache.mahout.graph.common.GraphUtils#indexVertices(Configuration, Path, Path)}</p>
*
* <p>This job outputs text files with a vertex id and its pagerank per line.</p>
*
@@ -105,7 +105,7 @@ public class PageRankJob extends Abstrac
addOption("numIterations", "it", "number of numIterations", String.valueOf(5));
addOption("teleportationProbability", "tp", "probability to teleport to a random vertex", String.valueOf(0.8));
- Map<String, String> parsedArgs = super.parseArguments(args);
+ Map<String, String> parsedArgs = parseArguments(args);
Path vertexIndex = new Path(parsedArgs.get("--vertexIndex"));
Path edges = new Path(parsedArgs.get("--edges"));
@@ -116,7 +116,7 @@ public class PageRankJob extends Abstrac
Preconditions.checkArgument(numVertices > 0);
Preconditions.checkArgument(numIterations > 0);
- Preconditions.checkArgument(teleportationProbability > 0 && teleportationProbability < +1);
+ Preconditions.checkArgument(teleportationProbability > 0.0 && teleportationProbability < 1.0);
Job indexedDegrees = prepareJob(edges, getTempPath(TMP_INDEXED_DEGREES), SequenceFileInputFormat.class,
IndexAndCountDegreeMapper.class, IntWritable.class, IntWritable.class, IntSumReducer.class, IntWritable.class,
@@ -245,7 +245,7 @@ public class PageRankJob extends Abstrac
Vector vector = new RandomAccessSparseVector(numVertices);
for (IntWritable incidentVertexIndex : incidentVertexIndexes) {
double weight = weights.get(incidentVertexIndex.get()) * teleportationProbability;
- System.out.println(vertexIndex.get() + "," + incidentVertexIndex.get() + ": " + weight);
+ //System.out.println(vertexIndex.get() + "," + incidentVertexIndex.get() + ": " + weight);
vector.set(incidentVertexIndex.get(), weight);
}
ctx.write(vertexIndex, new VectorWritable(vector));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Edge.java Sun Jul 17 15:06:01 2011
@@ -79,7 +79,7 @@ public class Edge implements WritableCom
@Override
public String toString() {
- return "(" + start.getId() + "," + end.getId() + ")";
+ return "(" + start.getId() + ',' + end.getId() + ')';
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/EnumerateTrianglesJob.java Sun Jul 17 15:06:01 2011
@@ -112,7 +112,7 @@ public class EnumerateTrianglesJob exten
while (bufferedVertexIdsIterator.hasNext()) {
Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong());
UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge, secondVertexOfMissingEdge);
- System.out.println(new JoinableUndirectedEdge(missingEdge, false) + " " + new VertexOrMarker(vertex));
+ //System.out.println(new JoinableUndirectedEdge(missingEdge, false) + " " + new VertexOrMarker(vertex));
ctx.write(new JoinableUndirectedEdge(missingEdge, false), new VertexOrMarker(vertex));
}
bufferedVertexIDs.add(firstVertexOfMissingEdge.getId());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/VertexOrMarker.java Sun Jul 17 15:06:01 2011
@@ -43,7 +43,7 @@ public class VertexOrMarker implements W
}
private VertexOrMarker(boolean marker) {
- this.marker = true;
+ this.marker = marker;
}
public boolean isMarker() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Sun Jul 17 15:06:01 2011
@@ -72,7 +72,7 @@ import org.apache.mahout.math.hadoop.sim
* <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
* arguments.</p>
*/
-public class RowSimilarityJob extends AbstractJob {
+public final class RowSimilarityJob extends AbstractJob {
public static final String DISTRIBUTED_SIMILARITY_CLASSNAME =
RowSimilarityJob.class.getName() + ".distributedSimilarityClassname";
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SeedVectorUtil.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,104 +15,90 @@ package org.apache.mahout.math.hadoop.si
* limitations under the License.
*/
+package org.apache.mahout.math.hadoop.similarity;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collection;
+import java.util.Collections;
import java.util.List;
-/**
- *
- *
- **/
-class SeedVectorUtil {
- private transient static Logger log = LoggerFactory.getLogger(SeedVectorUtil.class);
+final class SeedVectorUtil {
- private SeedVectorUtil() {
+ private static final Logger log = LoggerFactory.getLogger(SeedVectorUtil.class);
+ private SeedVectorUtil() {
}
- public static void loadSeedVectors(Configuration conf, List<NamedVector> seedVectors) throws IOException {
+ public static List<NamedVector> loadSeedVectors(Configuration conf) {
String seedPathStr = conf.get(VectorDistanceSimilarityJob.SEEDS_PATH_KEY);
- if (seedPathStr != null && seedPathStr.length() > 0) {
-
- Path thePath = new Path(seedPathStr, "*");
- Collection<Path> result = Lists.newArrayList();
-
- // get all filtered file names in result list
- FileSystem fs = thePath.getFileSystem(conf);
- FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(thePath, PathFilters.partFilter())),
- PathFilters.partFilter());
-
- for (FileStatus match : matches) {
- result.add(fs.makeQualified(match.getPath()));
- }
+ if (seedPathStr == null || seedPathStr.length() <= 0) {
+ return Collections.emptyList();
+ }
- long item = 0;
- for (Path seedPath : result) {
- for (Writable value : new SequenceFileValueIterable<Writable>(seedPath, conf)) {
- Class<? extends Writable> valueClass = value.getClass();
- if (valueClass.equals(Cluster.class)) {
- // get the cluster info
- Cluster cluster = (Cluster) value;
- Vector vector = cluster.getCenter();
- if (vector instanceof NamedVector) {
- seedVectors.add((NamedVector) vector);
- } else {
- seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
- }
- } else if (valueClass.equals(Canopy.class)) {
- // get the cluster info
- Canopy canopy = (Canopy) value;
- Vector vector = canopy.getCenter();
- if (vector instanceof NamedVector) {
- seedVectors.add((NamedVector) vector);
- } else {
- seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
- }
- } else if (valueClass.equals(Vector.class)) {
- Vector vector = (Vector) value;
- if (vector instanceof NamedVector) {
- seedVectors.add((NamedVector) vector);
- } else {
- seedVectors.add(new NamedVector(vector, seedPath + "." + item++));
- }
- } else if (valueClass.equals(VectorWritable.class) || valueClass.isInstance(VectorWritable.class)) {
- VectorWritable vw = (VectorWritable) value;
- Vector vector = vw.get();
- if (vector instanceof NamedVector) {
- seedVectors.add((NamedVector) vector);
- } else {
- seedVectors.add(new NamedVector(vector, seedPath + "." + item++));
- }
- } else {
- throw new IllegalStateException("Bad value class: " + valueClass);
- }
+ List<NamedVector> seedVectors = Lists.newArrayList();
+ long item = 0;
+ for (Writable value :
+ new SequenceFileDirValueIterable<Writable>(new Path(seedPathStr),
+ PathType.LIST,
+ PathFilters.partFilter(),
+ conf)) {
+ Class<? extends Writable> valueClass = value.getClass();
+ if (valueClass.equals(Cluster.class)) {
+ // get the cluster info
+ Cluster cluster = (Cluster) value;
+ Vector vector = cluster.getCenter();
+ if (vector instanceof NamedVector) {
+ seedVectors.add((NamedVector) vector);
+ } else {
+ seedVectors.add(new NamedVector(vector, cluster.getIdentifier()));
+ }
+ } else if (valueClass.equals(Canopy.class)) {
+ // get the cluster info
+ Canopy canopy = (Canopy) value;
+ Vector vector = canopy.getCenter();
+ if (vector instanceof NamedVector) {
+ seedVectors.add((NamedVector) vector);
+ } else {
+ seedVectors.add(new NamedVector(vector, canopy.getIdentifier()));
+ }
+ } else if (valueClass.equals(Vector.class)) {
+ Vector vector = (Vector) value;
+ if (vector instanceof NamedVector) {
+ seedVectors.add((NamedVector) vector);
+ } else {
+ seedVectors.add(new NamedVector(vector, seedPathStr + '.' + item++));
+ }
+ } else if (valueClass.equals(VectorWritable.class) || valueClass.isInstance(VectorWritable.class)) {
+ VectorWritable vw = (VectorWritable) value;
+ Vector vector = vw.get();
+ if (vector instanceof NamedVector) {
+ seedVectors.add((NamedVector) vector);
+ } else {
+ seedVectors.add(new NamedVector(vector, seedPathStr + '.' + item++));
}
- }
- if (seedVectors.isEmpty()) {
- throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr);
} else {
- log.info("Seed Vectors size: " + seedVectors.size());
+ throw new IllegalStateException("Bad value class: " + valueClass);
}
}
+ if (seedVectors.isEmpty()) {
+ throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr);
+ }
+ log.info("Seed Vectors size: {}", seedVectors.size());
+ return seedVectors;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceInvertedMapper.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
* limitations under the License.
*/
+package org.apache.mahout.math.hadoop.similarity;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
@@ -26,24 +26,23 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.List;
/**
- * Similar to {@link org.apache.mahout.math.hadoop.similarity.VectorDistanceMapper}, except it outputs
+ * Similar to {@link VectorDistanceMapper}, except it outputs
* <input, Vector>, where the vector is a dense vector contain one entry for every seed vector
*/
-public class VectorDistanceInvertedMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
- private transient static Logger log = LoggerFactory.getLogger(VectorDistanceInvertedMapper.class);
- protected DistanceMeasure measure;
- protected List<NamedVector> seedVectors;
+public final class VectorDistanceInvertedMapper
+ extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
+
+ private DistanceMeasure measure;
+ private List<NamedVector> seedVectors;
@Override
- protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException {
+ protected void map(WritableComparable<?> key, VectorWritable value, Context context)
+ throws IOException, InterruptedException {
String keyName;
Vector valVec = value.get();
if (valVec instanceof NamedVector) {
@@ -68,8 +67,7 @@ public class VectorDistanceInvertedMappe
measure = ccl.loadClass(conf.get(VectorDistanceSimilarityJob.DISTANCE_MEASURE_KEY))
.asSubclass(DistanceMeasure.class).newInstance();
measure.configure(conf);
- seedVectors = new ArrayList<NamedVector>(1000);
- SeedVectorUtil.loadSeedVectors(conf, seedVectors);
+ seedVectors = SeedVectorUtil.loadSeedVectors(conf);
} catch (InstantiationException e) {
throw new IllegalStateException(e);
} catch (IllegalAccessException e) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceMapper.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
* limitations under the License.
*/
+package org.apache.mahout.math.hadoop.similarity;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
@@ -26,24 +26,19 @@ import org.apache.mahout.common.distance
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.List;
-/**
- *
- *
- **/
-public class VectorDistanceMapper extends Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable> {
- private transient static Logger log = LoggerFactory.getLogger(VectorDistanceMapper.class);
- protected DistanceMeasure measure;
- protected List<NamedVector> seedVectors;
+public final class VectorDistanceMapper
+ extends Mapper<WritableComparable<?>, VectorWritable, StringTuple, DoubleWritable> {
+
+ private DistanceMeasure measure;
+ private List<NamedVector> seedVectors;
@Override
- protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException {
+ protected void map(WritableComparable<?> key, VectorWritable value, Context context)
+ throws IOException, InterruptedException {
String keyName;
Vector valVec = value.get();
if (valVec instanceof NamedVector) {
@@ -69,8 +64,7 @@ public class VectorDistanceMapper extend
measure = ccl.loadClass(conf.get(VectorDistanceSimilarityJob.DISTANCE_MEASURE_KEY))
.asSubclass(DistanceMeasure.class).newInstance();
measure.configure(conf);
- seedVectors = new ArrayList<NamedVector>(1000);
- SeedVectorUtil.loadSeedVectors(conf, seedVectors);
+ seedVectors = SeedVectorUtil.loadSeedVectors(conf);
} catch (InstantiationException e) {
throw new IllegalStateException(e);
} catch (IllegalAccessException e) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Sun Jul 17 15:06:01 2011
@@ -1,4 +1,3 @@
-package org.apache.mahout.math.hadoop.similarity;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +15,7 @@ package org.apache.mahout.math.hadoop.si
* limitations under the License.
*/
+package org.apache.mahout.math.hadoop.similarity;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -34,8 +34,6 @@ import org.apache.mahout.common.commandl
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.math.VectorWritable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
@@ -44,7 +42,7 @@ import java.io.IOException;
* and emits the a tuple of seed id, other id, distance. It is a more generic version of KMean's mapper
*/
public class VectorDistanceSimilarityJob extends AbstractJob {
- private static final Logger log = LoggerFactory.getLogger(VectorDistanceSimilarityJob.class);
+
public static final String SEEDS = "seeds";
public static final String SEEDS_PATH_KEY = "seedsPath";
public static final String DISTANCE_MEASURE_KEY = "vectorDistSim.measure";
@@ -62,7 +60,10 @@ public class VectorDistanceSimilarityJob
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(SEEDS, "s", "The set of vectors to compute distances against. Must fit in memory on the mapper");
addOption(DefaultOptionCreator.overwriteOption().create());
- addOption(OUT_TYPE_KEY, "ot", "[pw|v] -- Define the output style: pairwise, the default, (pw) or vector (v). Pairwise is a tuple of <seed, other, distance>, vector is <other, <Vector of size the number of seeds>>.", "pw");
+ addOption(OUT_TYPE_KEY, "ot",
+ "[pw|v] -- Define the output style: pairwise, the default, (pw) or vector (v). Pairwise is a "
+ + "tuple of <seed, other, distance>, vector is <other, <Vector of size the number of seeds>>.",
+ "pw");
if (parseArguments(args) == null) {
return -1;
}
@@ -95,26 +96,27 @@ public class VectorDistanceSimilarityJob
Path input,
Path seeds,
Path output,
- DistanceMeasure measure, String outType) throws IOException, ClassNotFoundException, InterruptedException {
+ DistanceMeasure measure, String outType)
+ throws IOException, ClassNotFoundException, InterruptedException {
conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName());
conf.set(SEEDS_PATH_KEY, seeds.toString());
Job job = new Job(conf, "Vector Distance Similarity: seeds: " + seeds + " input: " + input);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
- if (outType.equalsIgnoreCase("pw")) {
+ if ("pw".equalsIgnoreCase(outType)) {
job.setMapOutputKeyClass(StringTuple.class);
job.setOutputKeyClass(StringTuple.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(VectorDistanceMapper.class);
- } else if (outType.equalsIgnoreCase("v")) {
+ } else if ("v".equalsIgnoreCase(outType)) {
job.setMapOutputKeyClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(VectorWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setMapperClass(VectorDistanceInvertedMapper.class);
} else {
- throw new InterruptedException("Invalid outType specified: " + outType);
+ throw new IllegalArgumentException("Invalid outType specified: " + outType);
}
@@ -125,7 +127,7 @@ public class VectorDistanceSimilarityJob
job.setJarByClass(VectorDistanceSimilarityJob.class);
HadoopUtil.delete(conf, output);
if (!job.waitForCompletion(true)) {
- throw new InterruptedException("VectorDistance Similarity failed processing " + seeds);
+ throw new IllegalStateException("VectorDistance Similarity failed processing " + seeds);
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrence.java Sun Jul 17 15:06:01 2011
@@ -27,7 +27,7 @@ import org.apache.mahout.math.Varint;
/**
* an entry in a row vector stored together with a precomputed weight of the row
*/
-class WeightedOccurrence implements Writable, Cloneable {
+final class WeightedOccurrence implements Writable, Cloneable {
private int row;
private double value;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/WeightedOccurrenceArray.java Sun Jul 17 15:06:01 2011
@@ -24,7 +24,7 @@ import org.apache.hadoop.io.ArrayWritabl
/**
* an array of {@link WeightedOccurrence}s
*/
-class WeightedOccurrenceArray extends ArrayWritable {
+final class WeightedOccurrenceArray extends ArrayWritable {
WeightedOccurrenceArray() {
super(WeightedOccurrence.class);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java Sun Jul 17 15:06:01 2011
@@ -68,7 +68,7 @@ public final class TrainAdaptiveLogistic
CsvRecordFactory csv = lmp.getCsvRecordFactory();
model = lmp.createAdaptiveLogisticRegression();
- State<Wrapper, CrossFoldLearner> best = null;
+ State<Wrapper, CrossFoldLearner> best;
CrossFoldLearner learner = null;
int k = 0;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java Sun Jul 17 15:06:01 2011
@@ -53,7 +53,7 @@ public final class Job extends AbstractJ
Path output = new Path("output");
Configuration conf = new Configuration();
HadoopUtil.delete(conf, output);
- new Job().run(conf, new Path("testdata"), output,
+ run(conf, new Path("testdata"), output,
new EuclideanDistanceMeasure(), new TriangularKernelProfile(), 47.6,
1, 0.5, 10);
}
@@ -130,10 +130,16 @@ public final class Job extends AbstractJ
* @param maxIterations
* the int maximum number of iterations
*/
- public void run(Configuration conf, Path input, Path output,
- DistanceMeasure measure, IKernelProfile kernelProfile, double t1,
- double t2, double convergenceDelta, int maxIterations)
- throws IOException, InterruptedException, ClassNotFoundException {
+ public static void run(Configuration conf,
+ Path input,
+ Path output,
+ DistanceMeasure measure,
+ IKernelProfile kernelProfile,
+ double t1,
+ double t2,
+ double convergenceDelta,
+ int maxIterations)
+ throws IOException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output,
DIRECTORY_CONTAINING_CONVERTED_INPUT);
InputDriver.runJob(input, directoryContainingConvertedInput);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1147646&r1=1147645&r2=1147646&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Sun Jul 17 15:06:01 2011
@@ -152,8 +152,6 @@ public final class VectorDumper {
}
writer.write('\n');
}
- long i = 0;
- long count = 0;
long numItems = Long.MAX_VALUE;
if (cmdLine.hasOption(numItemsOpt)) {
numItems = Long.parseLong(cmdLine.getValue(numItemsOpt).toString());
@@ -161,6 +159,8 @@ public final class VectorDumper {
}
SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(path, true, conf);
Iterator<Pair<Writable,Writable>> iterator = iterable.iterator();
+ long i = 0;
+ long count = 0;
while (iterator.hasNext() && count < numItems) {
Pair<Writable, Writable> record = iterator.next();
Writable keyWritable = record.getFirst();