You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/06/12 22:44:21 UTC
svn commit: r1492416 [1/3] - in /mahout/trunk: ./
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/mahout/clas...
Author: ssc
Date: Wed Jun 12 20:44:19 2013
New Revision: 1492416
URL: http://svn.apache.org/r1492416
Log:
MAHOUT-1258 Another shot at findbugs and checkstyle
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/CardinalityException.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/Centroid.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/Vector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/function/PlusMult.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package-info.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/map/package-info.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/AbstractVectorTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorBinaryAggregateTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorBinaryAssignTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/randomized/RandomBlasting.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Wed Jun 12 20:44:19 2013
@@ -2,9 +2,11 @@ Mahout Change Log
Release 0.8 - unreleased
-__MAHOUT-1253: Add experiment tools for StreamingKMeans, part 1 (dfilimon)
+ MAHOUT-1258: Another shot at findbugs and checkstyle (ssc)
- MAHOUT-884: Matrix Concatenate Utility (Lance Norskog, smarthi)
+ MAHOUT-1253: Add experiment tools for StreamingKMeans, part 1 (dfilimon)
+
+ MAHOUT-884: Matrix Concatenate Utility (Lance Norskog via smarthi)
MAHOUT-1250: Deprecate unused algorithms (ssc)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Wed Jun 12 20:44:19 2013
@@ -58,8 +58,8 @@ public final class TasteHadoopUtils {
}
public static int readID(String token, boolean usesLongIDs) {
- return usesLongIDs ?
- TasteHadoopUtils.idToIndex(Long.parseLong(token))
+ return usesLongIDs
+ ? TasteHadoopUtils.idToIndex(Long.parseLong(token))
: Integer.parseInt(token);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Wed Jun 12 20:44:19 2013
@@ -250,7 +250,7 @@ public class ParallelALSFactorizationJob
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
result.set(ALS.sum(values.iterator()));
ctx.write(key, result);
}
@@ -263,7 +263,7 @@ public class ParallelALSFactorizationJob
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector sum = ALS.sum(values.iterator());
result.set(new SequentialAccessSparseVector(sum));
ctx.write(key, result);
@@ -277,7 +277,7 @@ public class ParallelALSFactorizationJob
@Override
public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector merged = VectorWritable.merge(vectors.iterator()).get();
result.set(new SequentialAccessSparseVector(merged));
ctx.write(key, result);
@@ -330,7 +330,7 @@ public class ParallelALSFactorizationJob
if (implicitFeedback) {
solverMapperClassInternal = SolveImplicitFeedbackMapper.class;
name = "Recompute " + matrixName + ", iteration (" + (iterationNumber + 1) + '/' + numIterations + "), "
- + '(' + numThreadsPerSolver + " threads, " + numFeatures +" features, implicit feedback)";
+ + '(' + numThreadsPerSolver + " threads, " + numFeatures + " features, implicit feedback)";
} else {
solverMapperClassInternal = SolveExplicitFeedbackMapper.class;
name = "Recompute " + matrixName + ", iteration (" + (iterationNumber + 1) + '/' + numIterations + "), "
@@ -412,7 +412,7 @@ public class ParallelALSFactorizationJob
static class IDMapReducer extends Reducer<VarIntWritable,VarLongWritable,VarIntWritable,VarLongWritable> {
@Override
protected void reduce(VarIntWritable index, Iterable<VarLongWritable> ids, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
ctx.write(index, ids.iterator().next());
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Wed Jun 12 20:44:19 2013
@@ -99,7 +99,7 @@ public class ConfusionMatrix {
int count = 0;
double accuracy = 0;
for (String label: labelMap.keySet()) {
- if (! label.equals(defaultLabel)) {
+ if (!label.equals(defaultLabel)) {
accuracy += getAccuracy(label);
}
count++;
@@ -121,14 +121,14 @@ public class ConfusionMatrix {
public double getKappa() {
double a = 0.0;
double b = 0.0;
- for(int i = 0; i < confusionMatrix.length; i++) {
+ for (int i = 0; i < confusionMatrix.length; i++) {
a += confusionMatrix[i][i];
double br = 0;
- for(int j = 0; j < confusionMatrix.length; j++) {
+ for (int j = 0; j < confusionMatrix.length; j++) {
br += confusionMatrix[i][j];
}
double bc = 0;
- for(int j = 0; j < confusionMatrix.length; j++) {
+ for (int j = 0; j < confusionMatrix.length; j++) {
bc += confusionMatrix[j][i];
}
b += br * bc;
@@ -143,9 +143,9 @@ public class ConfusionMatrix {
*/
public RunningAverageAndStdDev getNormalizedStats() {
RunningAverageAndStdDev summer = new FullRunningAverageAndStdDev();
- for(int d = 0; d < confusionMatrix.length; d++) {
+ for (int d = 0; d < confusionMatrix.length; d++) {
double total = 0;
- for(int j = 0; j < confusionMatrix.length; j++) {
+ for (int j = 0; j < confusionMatrix.length; j++) {
total += confusionMatrix[d][j];
}
summer.addDatum(confusionMatrix[d][d] / (total + 0.000001));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Data.java Wed Jun 12 20:44:19 2013
@@ -26,7 +26,7 @@ import java.util.List;
import java.util.Random;
/**
- * Holds a list of vectors and their corresponding Dataset. contains various operations that deals with the
+ * Holds a list of vectors and their corresponding Dataset. contains various OPERATIONS that deals with the
* vectors (subset, count,...)
*
*/
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java Wed Jun 12 20:44:19 2013
@@ -69,7 +69,7 @@ public class Dataset {
private static Attribute fromString(String from) {
Attribute toReturn = LABEL;
- if(NUMERICAL.toString().equalsIgnoreCase(from)) {
+ if (NUMERICAL.toString().equalsIgnoreCase(from)) {
toReturn = NUMERICAL;
} else if (CATEGORICAL.toString().equalsIgnoreCase(from)) {
toReturn = CATEGORICAL;
@@ -103,15 +103,14 @@ public class Dataset {
private int nbInstances;
/** JSON serial/de-serial-izer */
- private static final ObjectMapper objectMapper = new ObjectMapper();
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
// Some literals for JSON representation
- final static String TYPE = "type";
- final static String VALUES = "values";
- final static String LABEL = "label";
+ static final String TYPE = "type";
+ static final String VALUES = "values";
+ static final String LABEL = "label";
- protected Dataset() {
- }
+ protected Dataset() {}
/**
* Should only be called by a DataLoader
@@ -186,7 +185,7 @@ public class Dataset {
}
public Attribute getAttribute(int attr) {
- return attributes[attr];
+ return attributes[attr];
}
/**
@@ -215,7 +214,7 @@ public class Dataset {
}
public String toString() {
- return "attributes="+Arrays.toString(attributes);
+ return "attributes=" + Arrays.toString(attributes);
}
/**
@@ -323,7 +322,7 @@ public class Dataset {
FileSystem fs = path.getFileSystem(conf);
long bytesToRead = fs.getFileStatus(path).getLen();
- byte[] buff = new byte[new Long(bytesToRead).intValue()];
+ byte[] buff = new byte[Long.valueOf(bytesToRead).intValue()];
FSDataInputStream input = fs.open(path);
try {
input.readFully(buff);
@@ -361,7 +360,7 @@ public class Dataset {
toWrite.add(attribute);
}
try {
- return objectMapper.writeValueAsString(toWrite);
+ return OBJECT_MAPPER.writeValueAsString(toWrite);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
@@ -377,7 +376,7 @@ public class Dataset {
Dataset dataset = new Dataset();
List<Map<String, Object>> fromJSON;
try {
- fromJSON = objectMapper.readValue(json, new TypeReference<List<Map<String, Object>>>() {});
+ fromJSON = OBJECT_MAPPER.readValue(json, new TypeReference<List<Map<String, Object>>>() {});
} catch (Exception ex) {
throw new RuntimeException(ex);
}
@@ -386,15 +385,15 @@ public class Dataset {
String[][] nominalValues = new String[fromJSON.size()][];
for (int i = 0; i < fromJSON.size(); i++) {
Map<String, Object> attribute = fromJSON.get(i);
- if(Attribute.fromString((String) attribute.get(TYPE)) == Attribute.IGNORED) {
+ if (Attribute.fromString((String) attribute.get(TYPE)) == Attribute.IGNORED) {
ignored.add(i);
} else {
Attribute asAttribute = Attribute.fromString((String) attribute.get(TYPE));
attributes.add(asAttribute);
- if((Boolean) attribute.get(LABEL)) {
+ if ((Boolean) attribute.get(LABEL)) {
dataset.labelId = i - ignored.size();
}
- if(attribute.get(VALUES) != null) {
+ if (attribute.get(VALUES) != null) {
List get = (List) attribute.get(VALUES);
String[] array = (String[]) get.toArray(new String[]{});
nominalValues[i] = array;
@@ -404,7 +403,7 @@ public class Dataset {
dataset.attributes = attributes.toArray(new Attribute[]{});
dataset.ignored = new int[ignored.size()];
dataset.values = nominalValues;
- for(int i = 0; i < dataset.ignored.length; i++) {
+ for (int i = 0; i < dataset.ignored.length; i++) {
dataset.ignored[i] = ignored.get(i);
}
return dataset;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java Wed Jun 12 20:44:19 2013
@@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
@@ -204,17 +203,9 @@ public class Classifier {
if (files.length < 2) {
throw new IOException("not enough paths in the DistributedCache");
}
- LocalFileSystem localFs = FileSystem.getLocal(conf);
- if (!localFs.exists(files[0])) {//MAHOUT-992: this seems safe
- files[0] = localFs.makeQualified(new Path(DistributedCache.getCacheFiles(conf)[0].getPath()));
- }
-
dataset = Dataset.load(conf, files[0]);
-
converter = new DataConverter(dataset);
- if (!localFs.exists(files[1])) {//MAHOUT-992: this seems safe
- files[1] = localFs.makeQualified(new Path(DistributedCache.getCacheFiles(conf)[1].getPath()));
- }
+
forest = DecisionForest.load(conf, files[1]);
if (forest == null) {
throw new InterruptedException("DecisionForest not found!");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/L2.java Wed Jun 12 20:44:19 2013
@@ -33,13 +33,13 @@ public class L2 implements PriorFunction
private double s;
public L2(double scale) {
- this.s = scale;
- this.s2 = scale * scale;
+ s = scale;
+ s2 = scale * scale;
}
public L2() {
- this.s = 1.0;
- this.s2 = 1.0;
+ s = 1.0;
+ s2 = 1.0;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java Wed Jun 12 20:44:19 2013
@@ -133,8 +133,8 @@ public final class ClusteringUtils {
double maxDBIndex = 0;
for (int j = 0; j < n; ++j) {
if (i != j) {
- double dbIndex = (averageDistanceI + clusterDistanceSummaries.get(j).getMean()) /
- distanceMeasure.distance(centroids.get(i), centroids.get(j));
+ double dbIndex = (averageDistanceI + clusterDistanceSummaries.get(j).getMean())
+ / distanceMeasure.distance(centroids.get(i), centroids.get(j));
if (dbIndex > maxDBIndex) {
maxDBIndex = dbIndex;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Wed Jun 12 20:44:19 2013
@@ -371,10 +371,7 @@ public class CanopyDriver extends Abstra
boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
- ClusterClassificationDriver.run(conf,
- points,
- output,
- new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
+ ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
clusterClassificationThreshold, true, runSequential);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java Wed Jun 12 20:44:19 2013
@@ -124,12 +124,13 @@ public final class ClusterClassification
*/
public static void run(Path input, Path clusteringOutputPath, Path output, Double clusterClassificationThreshold,
boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
- Configuration conf = new Configuration();
- run(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely, runSequential);
+ Configuration conf = new Configuration();
+ run(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely, runSequential);
}
- public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, Double clusterClassificationThreshold,
- boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
+ public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output,
+ Double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException {
if (runSequential) {
classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
} else {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -157,7 +157,7 @@ public class FuzzyKMeansDriver extends A
boolean emitMostLikely,
double threshold,
boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException {
- Configuration conf = new Configuration();
+ Configuration conf = new Configuration();
Path clustersOut = buildClusters(conf,
input,
clustersIn,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -96,9 +96,6 @@ public class KMeansDriver extends Abstra
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
- if (getConf() == null) {
- setConf(new Configuration());
- }
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Wed Jun 12 20:44:19 2013
@@ -91,14 +91,6 @@ public class InMemoryCollapsedVariationa
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
-
- public InMemoryCollapsedVariationalBayes0(Matrix corpus,
- String[] terms,
- int numTopics,
- double alpha,
- double eta) {
- this(corpus, terms, numTopics, alpha, eta, 1, 1, 0);
- }
public InMemoryCollapsedVariationalBayes0(Matrix corpus,
String[] terms,
@@ -468,9 +460,6 @@ public class InMemoryCollapsedVariationa
@Override
public Configuration getConf() {
- if (super.getConf() == null) {
- setConf(new Configuration());
- }
return super.getConf();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Wed Jun 12 20:44:19 2013
@@ -247,7 +247,7 @@ public class TopicModel implements Confi
topicTermCounts.assignRow(x, new SequentialAccessSparseVector(numTerms));
}
topicSums.assign(1.0);
- if(threadPool.isTerminated()) {
+ if (threadPool.isTerminated()) {
initializeThreadPool();
}
}
@@ -262,7 +262,7 @@ public class TopicModel implements Confi
log.warn("Threadpool timed out on await termination - jobs still running!");
}
} catch (InterruptedException e) {
- log.error("Interrupted shutting down!", e);
+ log.error("Interrupted shutting down!", e);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashDriver.java Wed Jun 12 20:44:19 2013
@@ -57,8 +57,8 @@ public final class MinHashDriver extends
addOption(MIN_CLUSTER_SIZE, "mcs", "Minimum points inside a cluster", String.valueOf(10));
addOption(MIN_VECTOR_SIZE, "mvs", "Minimum size of vector to be hashed", String.valueOf(5));
- addOption(VECTOR_DIMENSION_TO_HASH, "vdh", "Dimension of vector to hash. Available types: (value, index). " +
- "Defaults to 'value'", HASH_DIMENSION_VALUE);
+ addOption(VECTOR_DIMENSION_TO_HASH, "vdh", "Dimension of vector to hash. Available types: (value, index). "
+ + "Defaults to 'value'", HASH_DIMENSION_VALUE);
addOption(HASH_TYPE, "ht", "Type of hash function to use. Available types: (linear, polynomial, murmur) ",
HashFactory.HashType.MURMUR.toString());
addOption(NUM_HASH_FUNCTIONS, "nh", "Number of hash functions to be used", String.valueOf(10));
@@ -110,7 +110,7 @@ public final class MinHashDriver extends
boolean succeeded = minHash.waitForCompletion(true);
if (!succeeded) {
- return -1;
+ return -1;
}
return 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Wed Jun 12 20:44:19 2013
@@ -212,7 +212,7 @@ public class EigencutsDriver extends Abs
/**
* Iteratively loops through the list, converting it to a Vector of double
- * primitives worthy of other Mahout operations
+ * primitives worthy of other Mahout OPERATIONS
*/
private static Vector listToVector(Collection<Double> list) {
Vector retval = new DenseVector(list.size());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityJob.java Wed Jun 12 20:44:19 2013
@@ -32,7 +32,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
/**
- * <p>There are a quite a few operations bundled within this mapper. Gather 'round
+ * <p>There are a quite a few OPERATIONS bundled within this mapper. Gather 'round
* and listen, all of ye.</p>
*
* <p>The input to this job is eight items:</p>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java Wed Jun 12 20:44:19 2013
@@ -281,11 +281,11 @@ public class BallKMeans implements Itera
* @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind.
*/
private void initializeSeedsKMeansPlusPlus(List<? extends WeightedVector> datapoints) {
- Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster " +
- "sensibly");
+ Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster "
+ + "sensibly");
Preconditions.checkArgument(datapoints.size() >= numClusters,
String.format("Must have more datapoints [%d] than clusters [%d]", datapoints.size(), numClusters));
- // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints.
+ // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints.
Centroid center = new Centroid(datapoints.iterator().next());
for (WeightedVector row : Iterables.skip(datapoints, 1)) {
center.update(row);
@@ -446,8 +446,8 @@ public class BallKMeans implements Itera
return Iterators.transform(centroids.iterator(), new Function<Vector, Centroid>() {
@Override
public Centroid apply(Vector input) {
- Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids " +
- "searcher");
+ Preconditions.checkArgument(input instanceof Centroid, "Non-centroid in centroids "
+ + "searcher");
//noinspection ConstantConditions
return (Centroid)input;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java Wed Jun 12 20:44:19 2013
@@ -164,40 +164,40 @@ public final class StreamingKMeansDriver
// There will be k final clusters, but in the Map phase to get a good approximation of the data, O(k log n)
// clusters are needed. Since n is the number of data points and not knowable until reading all the vectors,
// provide a decent estimate.
- addOption(ESTIMATED_NUM_MAP_CLUSTERS, "km", "The estimated number of clusters to use for the " +
- "Map phase of the job when running StreamingKMeans. This should be around k * log(n), " +
- "where k is the final number of clusters and n is the total number of data points to " +
- "cluster.");
+ addOption(ESTIMATED_NUM_MAP_CLUSTERS, "km", "The estimated number of clusters to use for the "
+ + "Map phase of the job when running StreamingKMeans. This should be around k * log(n), "
+ + "where k is the final number of clusters and n is the total number of data points to "
+ + "cluster.");
- addOption(ESTIMATED_DISTANCE_CUTOFF, "e", "The initial estimated distance cutoff between two " +
- "points for forming new clusters. If no value is given, it's estimated from the data set",
+ addOption(ESTIMATED_DISTANCE_CUTOFF, "e", "The initial estimated distance cutoff between two "
+ + "points for forming new clusters. If no value is given, it's estimated from the data set",
String.valueOf(INVALID_DISTANCE_CUTOFF));
// BallKMeans (reducer) options
- addOption(MAX_NUM_ITERATIONS, "mi", "The maximum number of iterations to run for the " +
- "BallKMeans algorithm used by the reducer. If no value is given, defaults to 10.", String.valueOf(10));
+ addOption(MAX_NUM_ITERATIONS, "mi", "The maximum number of iterations to run for the "
+ + "BallKMeans algorithm used by the reducer. If no value is given, defaults to 10.", String.valueOf(10));
- addOption(TRIM_FRACTION, "tf", "The 'ball' aspect of ball k-means means that only the closest points " +
- "to the centroid will actually be used for updating. The fraction of the points to be used is those " +
- "points whose distance to the center is within trimFraction * distance to the closest other center. " +
- "If no value is given, defaults to 0.9.", String.valueOf(0.9));
-
- addFlag(RANDOM_INIT, "ri", "Whether to use k-means++ initialization or random initialization " +
- "of the seed centroids. Essentially, k-means++ provides better clusters, but takes longer, whereas random " +
- "initialization takes less time, but produces worse clusters, and tends to fail more often and needs " +
- "multiple runs to compare to k-means++. If set, uses the random initialization.");
-
- addFlag(IGNORE_WEIGHTS, "iw", "Whether to correct the weights of the centroids after the clustering is done. " +
- "The weights end up being wrong because of the trimFraction and possible train/test splits. In some cases, " +
- "especially in a pipeline, having an accurate count of the weights is useful. If set, ignores the final " +
- "weights");
-
- addOption(TEST_PROBABILITY, "testp", "A double value between 0 and 1 that represents the percentage of " +
- "points to be used for 'testing' different clustering runs in the final BallKMeans " +
- "step. If no value is given, defaults to 0.1", String.valueOf(0.1));
+ addOption(TRIM_FRACTION, "tf", "The 'ball' aspect of ball k-means means that only the closest points "
+ + "to the centroid will actually be used for updating. The fraction of the points to be used is those "
+ + "points whose distance to the center is within trimFraction * distance to the closest other center. "
+ + "If no value is given, defaults to 0.9.", String.valueOf(0.9));
+
+ addFlag(RANDOM_INIT, "ri", "Whether to use k-means++ initialization or random initialization "
+ + "of the seed centroids. Essentially, k-means++ provides better clusters, but takes longer, whereas random "
+ + "initialization takes less time, but produces worse clusters, and tends to fail more often and needs "
+ + "multiple runs to compare to k-means++. If set, uses the random initialization.");
+
+ addFlag(IGNORE_WEIGHTS, "iw", "Whether to correct the weights of the centroids after the clustering is done. "
+ + "The weights end up being wrong because of the trimFraction and possible train/test splits. In some cases, "
+ + "especially in a pipeline, having an accurate count of the weights is useful. If set, ignores the final "
+ + "weights");
+
+ addOption(TEST_PROBABILITY, "testp", "A double value between 0 and 1 that represents the percentage of "
+ + "points to be used for 'testing' different clustering runs in the final BallKMeans "
+ + "step. If no value is given, defaults to 0.1", String.valueOf(0.1));
- addOption(NUM_BALLKMEANS_RUNS, "nbkm", "Number of BallKMeans runs to use at the end to try to cluster the " +
- "points. If no value is given, defaults to 4", String.valueOf(4));
+ addOption(NUM_BALLKMEANS_RUNS, "nbkm", "Number of BallKMeans runs to use at the end to try to cluster the "
+ + "points. If no value is given, defaults to 4", String.valueOf(4));
// Nearest neighbor search options
// The distance measure used for computing the distance between two points. Generally, the
@@ -208,22 +208,22 @@ public final class StreamingKMeansDriver
// The default searcher should be something more efficient that BruteSearch (ProjectionSearch, ...). See
// o.a.m.math.neighborhood.*
- addOption(SEARCHER_CLASS_OPTION, "sc", "The type of searcher to be used when performing nearest " +
- "neighbor searches. Defaults to ProjectionSearch.", ProjectionSearch.class.getCanonicalName());
+ addOption(SEARCHER_CLASS_OPTION, "sc", "The type of searcher to be used when performing nearest "
+ + "neighbor searches. Defaults to ProjectionSearch.", ProjectionSearch.class.getCanonicalName());
// In the original paper, the authors used 1 projection vector.
- addOption(NUM_PROJECTIONS_OPTION, "np", "The number of projections considered in estimating the " +
- "distances between vectors. Only used when the distance measure requested is either " +
- "ProjectionSearch or FastProjectionSearch. If no value is given, defaults to 3.", String.valueOf(3));
-
- addOption(SEARCH_SIZE_OPTION, "s", "In more efficient searches (non BruteSearch), " +
- "not all distances are calculated for determining the nearest neighbors. The number of " +
- "elements whose distances from the query vector is actually computer is proportional to " +
- "searchSize. If no value is given, defaults to 1.", String.valueOf(2));
-
- addFlag(REDUCE_STREAMING_KMEANS, "rskm", "There might be too many intermediate clusters from the mapper " +
- "to fit into memory, so the reducer can run another pass of StreamingKMeans to collapse them down to a " +
- "fewer clusters");
+ addOption(NUM_PROJECTIONS_OPTION, "np", "The number of projections considered in estimating the "
+ + "distances between vectors. Only used when the distance measure requested is either "
+ + "ProjectionSearch or FastProjectionSearch. If no value is given, defaults to 3.", String.valueOf(3));
+
+ addOption(SEARCH_SIZE_OPTION, "s", "In more efficient searches (non BruteSearch), "
+ + "not all distances are calculated for determining the nearest neighbors. The number of "
+ + "elements whose distances from the query vector is actually computer is proportional to "
+ + "searchSize. If no value is given, defaults to 1.", String.valueOf(2));
+
+ addFlag(REDUCE_STREAMING_KMEANS, "rskm", "There might be too many intermediate clusters from the mapper "
+ + "to fit into memory, so the reducer can run another pass of StreamingKMeans to collapse them down to a "
+ + "fewer clusters");
addOption(DefaultOptionCreator.methodOption().create());
@@ -338,16 +338,16 @@ public final class StreamingKMeansDriver
Preconditions.checkArgument(numClusters > 0, "Invalid number of clusters requested");
// StreamingKMeans
- Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map " +
- "clusters; There must be more than the final number of clusters (k log n vs k)");
+ Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map "
+ + "clusters; There must be more than the final number of clusters (k log n vs k)");
Preconditions.checkArgument(estimatedDistanceCutoff == INVALID_DISTANCE_CUTOFF || estimatedDistanceCutoff > 0,
"estimatedDistanceCutoff cannot be negative");
// BallKMeans
Preconditions.checkArgument(maxNumIterations > 0, "Must have at least one BallKMeans iteration");
Preconditions.checkArgument(trimFraction > 0, "trimFraction must be positive");
- Preconditions.checkArgument(testProbability >= 0 && testProbability < 1, "test probability is not in the " +
- "interval [0, 1)");
+ Preconditions.checkArgument(testProbability >= 0 && testProbability < 1, "test probability is not in the "
+ + "interval [0, 1)");
Preconditions.checkArgument(numBallKMeansRuns > 0, "numBallKMeans cannot be negative");
// Searcher
@@ -386,12 +386,12 @@ public final class StreamingKMeansDriver
conf.setBoolean(REDUCE_STREAMING_KMEANS, reduceStreamingKMeans);
- log.info("Parameters are: [k] numClusters {}; " +
- "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} " +
- "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; " +
- "testProbability {}; numBallKMeansRuns {}; " +
- "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; " +
- "method {}; reduceStreamingKMeans {}", numClusters, estimatedNumMapClusters, estimatedDistanceCutoff,
+ log.info("Parameters are: [k] numClusters {}; "
+ + "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} "
+ + "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; "
+ + "testProbability {}; numBallKMeansRuns {}; "
+ + "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
+ + "method {}; reduceStreamingKMeans {}", numClusters, estimatedNumMapClusters, estimatedDistanceCutoff,
maxNumIterations, trimFraction, randomInit, ignoreWeights, testProbability, numBallKMeansRuns,
measureClass, searcherClass, searchSize, numProjections, method, reduceStreamingKMeans);
}
@@ -418,7 +418,7 @@ public final class StreamingKMeansDriver
}
private static int runSequentially(Configuration conf, Path input, Path output)
- throws IOException, ExecutionException, InterruptedException {
+ throws IOException, ExecutionException, InterruptedException {
long start = System.currentTimeMillis();
// Run StreamingKMeans step in parallel by spawning 1 thread per input path to process.
ExecutorService pool = Executors.newCachedThreadPool();
@@ -452,7 +452,8 @@ public final class StreamingKMeansDriver
}
@SuppressWarnings("unchecked")
- public static int runMapReduce(Configuration conf, Path input, Path output) throws IOException, ClassNotFoundException, InterruptedException {
+ public static int runMapReduce(Configuration conf, Path input, Path output)
+ throws IOException, ClassNotFoundException, InterruptedException {
// Prepare Job for submission.
Job job = HadoopUtil.prepareJob(input, output, SequenceFileInputFormat.class,
StreamingKMeansMapper.class, IntWritable.class, CentroidWritable.class,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java Wed Jun 12 20:44:19 2013
@@ -16,12 +16,8 @@ public class StreamingKMeansThread imple
private Iterable<Centroid> datapoints;
public StreamingKMeansThread(Path input, Configuration conf) {
- this.datapoints = StreamingKMeansUtilsMR.getCentroidsFromVectorWritable(new SequenceFileValueIterable<VectorWritable>(input, false, conf));
- this.conf = conf;
- }
-
- public StreamingKMeansThread(Iterable<Centroid> datapoints, Configuration conf) {
- this.datapoints = datapoints;
+ this.datapoints = StreamingKMeansUtilsMR.getCentroidsFromVectorWritable(
+ new SequenceFileValueIterable<VectorWritable>(input, false, conf));
this.conf = conf;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java Wed Jun 12 20:44:19 2013
@@ -53,8 +53,8 @@ public final class StreamingKMeansUtilsM
if (searcherClass.equals(BruteSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class}, new Object[]{distanceMeasure});
- } else if (searcherClass.equals(FastProjectionSearch.class.getName()) ||
- searcherClass.equals(ProjectionSearch.class.getName())) {
+ } else if (searcherClass.equals(FastProjectionSearch.class.getName())
+ || searcherClass.equals(ProjectionSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class, int.class, int.class},
new Object[]{distanceMeasure, numProjections, searchSize});
@@ -116,7 +116,7 @@ public final class StreamingKMeansUtilsM
* @throws java.io.IOException
*/
public static void writeCentroidsToSequenceFile(Iterable<Centroid> centroids, Path path, Configuration conf)
- throws IOException {
+ throws IOException {
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
@@ -131,7 +131,7 @@ public final class StreamingKMeansUtilsM
}
public static void writeVectorsToSequenceFile(Iterable<? extends Vector> datapoints, Path path, Configuration conf)
- throws IOException {
+ throws IOException {
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java Wed Jun 12 20:44:19 2013
@@ -71,7 +71,8 @@ public final class ClusterCountReader {
* @param conf The hadoop configuration.
* @return An ArrayList containing the final cluster ids.
*/
- public static Map<Integer, Integer> getClusterIDs(Path clusterOutputPath, Configuration conf, boolean keyIsClusterId) throws IOException {
+ public static Map<Integer, Integer> getClusterIDs(Path clusterOutputPath, Configuration conf, boolean keyIsClusterId)
+ throws IOException {
Map<Integer, Integer> clusterIds = new HashMap<Integer, Integer>();
FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
@@ -85,7 +86,7 @@ public final class ClusterCountReader {
int i = 0;
while (it.hasNext()) {
Integer key, value;
- if (keyIsClusterId == true) { // key is the cluster id, value is i, the index we will use
+ if (keyIsClusterId) { // key is the cluster id, value is i, the index we will use
key = it.next().getValue().getId();
value = i;
} else {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Wed Jun 12 20:44:19 2013
@@ -74,12 +74,8 @@ public final class ClusterOutputPostProc
public void process() throws IOException {
createPostProcessDirectory();
for (Pair<?, WeightedVectorWritable> record
- : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints,
- PathType.GLOB,
- PathFilters.partFilter(),
- null,
- false,
- conf)) {
+ : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints, PathType.GLOB, PathFilters.partFilter(),
+ null, false, conf)) {
String clusterId = record.getFirst().toString().trim();
putVectorInRespectiveCluster(clusterId, record.getSecond());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorDriver.java Wed Jun 12 20:44:19 2013
@@ -62,9 +62,6 @@ public final class ClusterOutputPostProc
Path input = getInputPath();
Path output = getOutputPath();
- if (getConf() == null) {
- setConf(new Configuration());
- }
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
@@ -89,10 +86,10 @@ public final class ClusterOutputPostProc
* Post processes the output of clustering algorithms and groups them into respective clusters. Each
* cluster's vectors are written into a directory named after its clusterId.
*
- * @param input The output path provided to the clustering algorithm, whose would be post processed. Hint : The
+ * @param input The output path provided to the clustering algorithm, whose would be post processed. Hint: The
* path of the directory containing clusters-*-final and clusteredPoints.
* @param output The post processed data would be stored at this path.
- * @param runSequential If set to true, post processes it sequentially, else, uses. MapReduce. Hint : If the clustering
+ * @param runSequential If set to true, post processes it sequentially, else, uses. MapReduce. Hint: If the clustering
* was done sequentially, make it sequential, else vice versa.
*/
public static void run(Path input, Path output, boolean runSequential) throws IOException,
@@ -134,7 +131,8 @@ public final class ClusterOutputPostProc
private static void postProcessMR(Configuration conf, Path input, Path output) throws IOException,
InterruptedException,
ClassNotFoundException {
- System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, as the MapReduce option will not work properly");
+ System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, "
+ + "as the MapReduce option will not work properly");
int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf);
conf.set("clusterOutputPath", input.toString());
Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorMapper.java Wed Jun 12 20:44:19 2013
@@ -48,9 +48,10 @@ public class ClusterOutputPostProcessorM
}
@Override
- public void map(IntWritable key, WeightedVectorWritable val, Context context) throws IOException, InterruptedException {
- //by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer, since they
- //are numbered from 0 to k-1, where k is the number of clusters
+ public void map(IntWritable key, WeightedVectorWritable val, Context context)
+ throws IOException, InterruptedException {
+ // by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer,
+ // since they are numbered from 0 to k-1, where k is the number of clusters
outputVector.set(val.getVector());
context.write(new IntWritable(newClusterMappings.get(key.get())), outputVector);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessorReducer.java Wed Jun 12 20:44:19 2013
@@ -29,8 +29,8 @@ import java.util.Map;
/**
* Reducer for post processing cluster output.
*/
-public class ClusterOutputPostProcessorReducer extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
-
+public class ClusterOutputPostProcessorReducer
+ extends Reducer<IntWritable, VectorWritable, IntWritable, VectorWritable> {
private Map<Integer, Integer> reverseClusterMappings;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Wed Jun 12 20:44:19 2013
@@ -114,7 +114,7 @@ public abstract class AbstractJob extend
private Group group;
protected AbstractJob() {
- options = new LinkedList<Option>();
+ options = Lists.newLinkedList();;
}
/** Returns the input path established by a call to {@link #parseArguments(String[])}.
@@ -415,19 +415,19 @@ public abstract class AbstractJob extend
return res;
}
- public int getInt(String optionName){
+ public int getInt(String optionName) {
return Integer.parseInt(getOption(optionName));
}
- public int getInt(String optionName, int defaultVal){
+ public int getInt(String optionName, int defaultVal) {
return Integer.parseInt(getOption(optionName, String.valueOf(defaultVal)));
}
- public float getFloat(String optionName){
+ public float getFloat(String optionName) {
return Float.parseFloat(getOption(optionName));
}
- public float getFloat(String optionName, float defaultVal){
+ public float getFloat(String optionName, float defaultVal) {
return Float.parseFloat(getOption(optionName, String.valueOf(defaultVal)));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Wed Jun 12 20:44:19 2013
@@ -90,11 +90,8 @@ public final class TimingStatistics impl
+ "stdDev = " + DF.format(getStdDevTime() / 1000.0) + "us;";
}
- public Call newCall() {
- return new Call();
- }
-
- /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT to warm up. */
+ /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT
+ * to warm up. */
public Call newCall(long leadTimeUsec) {
if (leadSumTime > leadTimeUsec) {
return new Call();
@@ -104,7 +101,7 @@ public final class TimingStatistics impl
}
/** Ignores counting the performance metrics. The caller should enough time for the JIT to warm up. */
- public class LeadTimeCall extends Call {
+ public final class LeadTimeCall extends Call {
private LeadTimeCall() { }
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java Wed Jun 12 20:44:19 2013
@@ -35,7 +35,7 @@ public interface DistanceMeasure extends
double distance(Vector v1, Vector v2);
/**
- * Optimized version of distance metric for sparse vectors. This distance computation requires operations
+ * Optimized version of distance metric for sparse vectors. This distance computation requires OPERATIONS
* proportional to the number of non-zero elements in the vector instead of the cardinality of the vector.
*
* @param centroidLengthSquare
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterator.java Wed Jun 12 20:44:19 2013
@@ -32,6 +32,8 @@ import com.google.common.base.Charsets;
import com.google.common.collect.AbstractIterator;
import com.google.common.io.Closeables;
import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner
@@ -43,14 +45,17 @@ public final class FileLineIterator exte
private final BufferedReader reader;
- /**
- * Creates a over a given file, assuming a UTF-8 encoding.
- *
- * @throws java.io.FileNotFoundException
- * if the file does not exist
- * @throws IOException
- * if the file cannot be read
- */
+ private static final Logger log = LoggerFactory.getLogger(FileLineIterator.class);
+
+ /**
+ * Creates a over a given file, assuming a UTF-8 encoding.
+ *
+ * @throws java.io.FileNotFoundException
+ * if the file does not exist
+ * @throws IOException
+ * if the file cannot be read
+ */
+
public FileLineIterator(File file) throws IOException {
this(file, Charsets.UTF_8, false);
}
@@ -115,7 +120,7 @@ public final class FileLineIterator exte
try {
close();
} catch (IOException e) {
- //we are throwing here anyway, so do nothing
+ log.error(e.getMessage(), e);
}
throw new IllegalStateException(ioe);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java Wed Jun 12 20:44:19 2013
@@ -30,6 +30,8 @@ import org.apache.hadoop.io.SequenceFile
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.mahout.common.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p>{@link java.util.Iterator} over a {@link SequenceFile}'s keys and values, as a {@link Pair}
@@ -47,9 +49,12 @@ public final class SequenceFileIterator<
private V value;
private final boolean reuseKeyValueInstances;
- /**
- * @throws IOException if path can't be read, or its key or value class can't be instantiated
- */
+ private static final Logger log = LoggerFactory.getLogger(SequenceFileIterator.class);
+
+ /**
+ * @throws IOException if path can't be read, or its key or value class can't be instantiated
+ */
+
public SequenceFileIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
key = null;
value = null;
@@ -104,7 +109,7 @@ public final class SequenceFileIterator<
try {
close();
} catch (IOException e) {
- //throwing next anyway
+ log.error(e.getMessage(), e);
}
throw new IllegalStateException(ioe);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java Wed Jun 12 20:44:19 2013
@@ -28,6 +28,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p>{@link java.util.Iterator} over a {@link SequenceFile}'s values only.</p>
@@ -41,9 +43,12 @@ public final class SequenceFileValueIter
private V value;
private final boolean reuseKeyValueInstances;
- /**
- * @throws IOException if path can't be read, or its key or value class can't be instantiated
- */
+ private static final Logger log = LoggerFactory.getLogger(SequenceFileValueIterator.class);
+
+ /**
+ * @throws IOException if path can't be read, or its key or value class can't be instantiated
+ */
+
public SequenceFileValueIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
value = null;
FileSystem fs = path.getFileSystem(conf);
@@ -83,7 +88,7 @@ public final class SequenceFileValueIter
try {
close();
} catch (IOException e) {
- //throw the original exception next
+ log.error(e.getMessage(), e);
}
throw new IllegalStateException(ioe);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java Wed Jun 12 20:44:19 2013
@@ -66,7 +66,7 @@ import java.util.concurrent.Future;
* @param <T> The payload class.
*/
public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, Closeable {
- // used to execute operations on the population in thread parallel.
+ // used to execute OPERATIONS on the population in thread parallel.
private ExecutorService pool;
// threadCount is serialized so that we can reconstruct the thread pool
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java Wed Jun 12 20:44:19 2013
@@ -34,7 +34,7 @@ import java.util.concurrent.atomic.Atomi
/**
* Records evolutionary state and provides a mutation operation for recorded-step meta-mutation.
*
- * You provide the payload, this class provides the mutation operations. During mutation,
+ * You provide the payload, this class provides the mutation OPERATIONS. During mutation,
* the payload is copied and after the state variables are changed, they are passed to the
* payload.
*
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Wed Jun 12 20:44:19 2013
@@ -221,7 +221,7 @@ public final class PFPGrowth {
int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
if (fList.size() % numGroups != 0) {
- maxPerGroup++;
+ maxPerGroup++;
}
params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java Wed Jun 12 20:44:19 2013
@@ -23,6 +23,7 @@ import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
+import com.google.common.collect.Sets;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
@@ -46,7 +47,7 @@ public class ParallelCountingMapper exte
InterruptedException {
String[] items = splitter.split(input.toString());
- Set<String> uniqueItems = new HashSet<String>(Arrays.asList(items));
+ Set<String> uniqueItems = Sets.newHashSet(Arrays.asList(items));
for (String item : uniqueItems) {
if (item.trim().isEmpty()) {
continue;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Wed Jun 12 20:44:19 2013
@@ -47,7 +47,7 @@ import com.google.common.collect.Iterato
/**
* DistributedRowMatrix is a FileSystem-backed VectorIterable in which the vectors live in a
- * SequenceFile<WritableComparable,VectorWritable>, and distributed operations are executed as M/R passes on
+ * SequenceFile<WritableComparable,VectorWritable>, and distributed OPERATIONS are executed as M/R passes on
* Hadoop. The usage is as follows: <p>
* <p>
* <pre>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/VectorDistanceSimilarityJob.java Wed Jun 12 20:44:19 2013
@@ -85,9 +85,6 @@ public class VectorDistanceSimilarityJob
HadoopUtil.delete(getConf(), output);
}
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
- if (getConf() == null) {
- setConf(new Configuration());
- }
String outType = getOption(OUT_TYPE_KEY, "pw");
Double maxDistance = null;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java Wed Jun 12 20:44:19 2013
@@ -27,6 +27,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
+import com.google.common.collect.Lists;
import org.apache.commons.lang3.Validate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -252,7 +253,7 @@ public final class ABtDenseOutJob {
* conditions will never kick in. Or, the only situation where we
* can't fit Y_i block in memory is when A input is much sparser
* than k+p per row. But if this is the case, then we'd be looking
- * at very few elements without engaging them in any operations so
+ * at very few elements without engaging them in any OPERATIONS so
* even then it should be ok.
*/
if (j < aRowBegin) {
@@ -364,7 +365,7 @@ public final class ABtDenseOutJob {
NUMBER_FORMAT.setGroupingUsed(false);
}
- private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+ private final Deque<Closeable> closeables = Lists.newLinkedList();
protected int blockHeight;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.Deque;
import java.util.LinkedList;
import java.util.regex.Matcher;
+import com.google.common.collect.Lists;
import org.apache.commons.lang3.Validate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -293,7 +294,7 @@ public final class ABtJob {
NUMBER_FORMAT.setGroupingUsed(false);
}
- private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+ private final Deque<Closeable> closeables = Lists.newLinkedList();
protected final SparseRowBlockWritable accum = new SparseRowBlockWritable();
protected int blockHeight;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java Wed Jun 12 20:44:19 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.Deque;
import java.util.LinkedList;
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile.CompressionType;
@@ -76,7 +77,7 @@ public final class QJob {
Mapper<Writable, VectorWritable, SplitPartitionedWritable, VectorWritable> {
private MultipleOutputs outputs;
- private final Deque<Closeable> closeables = new LinkedList<Closeable>();
+ private final Deque<Closeable> closeables = Lists.newLinkedList();
private SplitPartitionedWritable qHatKey;
private SplitPartitionedWritable rHatKey;
private Vector yRow;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java Wed Jun 12 20:44:19 2013
@@ -62,7 +62,7 @@ public final class YtYJob {
* we keep yRow in a dense form here but keep an eye not to dense up while
* doing YtY products. I am not sure that sparse vector would create much
* performance benefits since we must to assume that y would be more often
- * dense than sparse, so for bulk dense operations that would perform
+ * dense than sparse, so for bulk dense OPERATIONS that would perform
* somewhat better than a RandomAccessSparse vector frequent updates.
*/
private Vector yRow;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GivensThinSolver.java Wed Jun 12 20:44:19 2013
@@ -30,8 +30,8 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.hadoop.stochasticsvd.UpperTriangular;
/**
- * Givens Thin solver. Standard Givens operations are reordered in a way that
- * helps us to push them thru MapReduce operations in a block fashion.
+ * Givens Thin solver. Standard Givens OPERATIONS are reordered in a way that
+ * helps us to push them thru MapReduce OPERATIONS in a block fashion.
*/
public class GivensThinSolver {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java Wed Jun 12 20:44:19 2013
@@ -71,7 +71,8 @@ public class BruteSearch extends Updatab
limit = Math.min(limit, referenceVectors.size());
// A priority queue of the best @limit elements, ordered from worst to best so that the worst
// element is always on top and can easily be removed.
- PriorityQueue<WeightedThing<Integer>> bestNeighbors = new PriorityQueue<WeightedThing<Integer>>(limit, Ordering.natural().reverse());
+ PriorityQueue<WeightedThing<Integer>> bestNeighbors =
+ new PriorityQueue<WeightedThing<Integer>>(limit, Ordering.natural().reverse());
// The resulting list of weighted WeightedVectors (the weight is the distance from the query).
List<WeightedThing<Vector>> results =
Lists.newArrayListWithCapacity(limit);