You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/08/30 20:41:48 UTC
svn commit: r990892 [1/2] - in /mahout/trunk: buildtools/src/main/resources/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/
core/src/main/java/org/apache/mahout/clusterin...
Author: srowen
Date: Mon Aug 30 18:41:46 2010
New Revision: 990892
URL: http://svn.apache.org/viewvc?rev=990892&view=rev
Log:
More assault on PMD / checkstyle warnings
Modified:
mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java
mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml
mahout/trunk/etc/findbugs-exclude.xml
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java
mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/KnownDoubleQuantileEstimator.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/engine/MersenneTwisterTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
mahout/trunk/maven/src/main/resources/findbugs-exclude.xml
mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMergeReducer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizer.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Modified: mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
<rule ref="rulesets/basic.xml/BooleanInstantiation"/>
<rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
<rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
- <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+ <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
<rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
<rule ref="rulesets/basic.xml/EmptyIfStmt"/>
<rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
<!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
<!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
- <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+ <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
<rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
<!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
<!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
<!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
- <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+ <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
<!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
<rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
<!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
<!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
<!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
<rule ref="rulesets/naming.xml/NoPackage"/>
<rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
<!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
<!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
<!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
- <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+ <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
<rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
<!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
- <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+ <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
<rule ref="rulesets/strings.xml/StringInstantiation"/>
<rule ref="rulesets/strings.xml/StringToString"/>
<!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Mon Aug 30 18:41:46 2010
@@ -80,24 +80,24 @@ public final class AggregateAndRecommend
FSDataInputStream in = null;
try {
- String itemFilePathString = jobConf.get(ITEMS_FILE);
- if (itemFilePathString == null) {
- itemsToRecommendFor = null;
- } else {
- Path unqualifiedItemsFilePath = new Path(itemFilePathString);
- FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
- itemsToRecommendFor = new FastIDSet();
- Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
- in = fs.open(itemsFilePath);
- for (String line : new FileLineIterable(in)) {
- itemsToRecommendFor.add(Long.parseLong(line));
- }
+ String itemFilePathString = jobConf.get(ITEMS_FILE);
+ if (itemFilePathString == null) {
+ itemsToRecommendFor = null;
+ } else {
+ Path unqualifiedItemsFilePath = new Path(itemFilePathString);
+ FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
+ itemsToRecommendFor = new FastIDSet();
+ Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
+ in = fs.open(itemsFilePath);
+ for (String line : new FileLineIterable(in)) {
+ itemsToRecommendFor.add(Long.parseLong(line));
}
- } catch (IOException ioe) {
- throw new IllegalStateException(ioe);
- } finally {
- IOUtils.closeStream(in);
}
+ } catch (IOException ioe) {
+ throw new IllegalStateException(ioe);
+ } finally {
+ IOUtils.closeStream(in);
+ }
}
private static final UnaryFunction ABSOLUTE_VALUES = new UnaryFunction() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Aug 30 18:41:46 2010
@@ -86,12 +86,12 @@ public final class ItemSimilarityJob ext
addInputOption();
addOutputOption();
- addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " +
- "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
- addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number " +
- "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
- addOption("maxCooccurrencesPerItem", "o", "try to cap the number of cooccurrences per item to this number " +
- "(default: " + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
+ addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
+ + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
+ addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number "
+ + "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
+ addOption("maxCooccurrencesPerItem", "o", "try to cap the number of cooccurrences per item to this number "
+ + "(default: " + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
Map<String,String> parsedArgs = parseArguments(args);
@@ -137,9 +137,9 @@ public final class ItemSimilarityJob ext
VarIntWritable.class,
NullWritable.class,
TextOutputFormat.class);
- countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
- countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
- countUsers.waitForCompletion(true);
+ countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
+ countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
+ countUsers.waitForCompletion(true);
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -178,12 +178,12 @@ public final class ItemSimilarityJob ext
/* Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like
* new DistributedRowMatrix(...).rowSimilarity(...) */
ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {
- "-Dmapred.input.dir=" + itemUserMatrixPath.toString(),
- "-Dmapred.output.dir=" + similarityMatrixPath.toString(),
- "--numberOfColumns", String.valueOf(numberOfUsers),
- "--similarityClassname", similarityClassName,
- "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem + 1),
- "--tempDir", tempDirPath.toString() });
+ "-Dmapred.input.dir=" + itemUserMatrixPath.toString(),
+ "-Dmapred.output.dir=" + similarityMatrixPath.toString(),
+ "--numberOfColumns", String.valueOf(numberOfUsers),
+ "--similarityClassname", similarityClassName,
+ "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem + 1),
+ "--tempDir", tempDirPath.toString() });
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
Job mostSimilarItems = prepareJob(similarityMatrixPath,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Mon Aug 30 18:41:46 2010
@@ -34,10 +34,6 @@ import org.apache.mahout.math.VectorWrit
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/**
- * @author jeff
- *
- */
public class CanopyClusterer {
private static final Logger log = LoggerFactory.getLogger(CanopyClusterer.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Mon Aug 30 18:41:46 2010
@@ -23,7 +23,6 @@ import java.util.Collection;
import java.util.List;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.Cluster;
@@ -283,13 +282,11 @@ public class DirichletClusterer {
* @param vector a VectorWritable holding the Vector
* @param clusters a List of DirichletClusters
* @param context a Mapper.Context to emit to
- * @throws IOException
- * @throws InterruptedException
*/
public void emitPointToClusters(VectorWritable vector,
List<DirichletCluster> clusters,
- Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
- throws IOException, InterruptedException {
+ Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+ throws IOException, InterruptedException {
Vector pi = new DenseVector(clusters.size());
for (int i = 0; i < clusters.size(); i++) {
pi.set(i, clusters.get(i).getModel().pdf(vector));
@@ -309,14 +306,12 @@ public class DirichletClusterer {
* @param clusters a List of DirichletClusters
* @param pi the normalized pdf Vector for the point
* @param context a Mapper.Context to emit to
- * @throws IOException
- * @throws InterruptedException
*/
private void emitMostLikelyCluster(VectorWritable point,
Collection<DirichletCluster> clusters,
Vector pi,
- Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
- throws IOException, InterruptedException {
+ Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+ throws IOException, InterruptedException {
int clusterId = -1;
double clusterPdf = 0;
for (int i = 0; i < clusters.size(); i++) {
@@ -336,14 +331,12 @@ public class DirichletClusterer {
* @param clusters a List of DirichletClusters
* @param pi the normalized pdf Vector for the point
* @param context a Mapper.Context to emit to
- * @throws IOException
- * @throws InterruptedException
*/
private void emitAllClusters(VectorWritable point,
List<DirichletCluster> clusters,
Vector pi,
- Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
- throws IOException, InterruptedException {
+ Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
+ throws IOException, InterruptedException {
for (int i = 0; i < clusters.size(); i++) {
double pdf = pi.get(i);
if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
@@ -359,9 +352,9 @@ public class DirichletClusterer {
* @param vector a VectorWritable holding the Vector
* @param clusters a List of DirichletClusters
* @param writer a SequenceFile.Writer to emit to
- * @throws IOException
*/
- public void emitPointToClusters(VectorWritable vector, List<DirichletCluster> clusters, Writer writer) throws IOException {
+ public void emitPointToClusters(VectorWritable vector, List<DirichletCluster> clusters, Writer writer)
+ throws IOException {
Vector pi = new DenseVector(clusters.size());
for (int i = 0; i < clusters.size(); i++) {
pi.set(i, clusters.get(i).getModel().pdf(vector));
@@ -381,9 +374,9 @@ public class DirichletClusterer {
* @param clusters a List of DirichletClusters
* @param pi the normalized pdf Vector for the point
* @param writer a SequenceFile.Writer to emit to
- * @throws IOException
*/
- private void emitAllClusters(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer) throws IOException {
+ private void emitAllClusters(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer)
+ throws IOException {
for (int i = 0; i < clusters.size(); i++) {
double pdf = pi.get(i);
if (pdf > threshold && clusters.get(i).getTotalCount() > 0) {
@@ -400,10 +393,11 @@ public class DirichletClusterer {
* @param clusters a List of DirichletClusters
* @param pi the normalized pdf Vector for the point
* @param writer a SequenceFile.Writer to emit to
- * @throws IOException
*/
- private void emitMostLikelyCluster(VectorWritable vector, List<DirichletCluster> clusters, Vector pi, Writer writer)
- throws IOException {
+ private static void emitMostLikelyCluster(VectorWritable vector,
+ Collection<DirichletCluster> clusters,
+ Vector pi,
+ Writer writer) throws IOException {
double maxPdf = 0;
int clusterId = -1;
for (int i = 0; i < clusters.size(); i++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Mon Aug 30 18:41:46 2010
@@ -83,7 +83,8 @@ public class DirichletDriver extends Abs
}
@Override
- public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+ public int run(String[] args)
+ throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
NoSuchMethodException, InvocationTargetException, InterruptedException {
addInputOption();
addOutputOption();
@@ -125,7 +126,8 @@ public class DirichletDriver extends Abs
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
- boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
+ boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION)
+ .equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
int prototypeSize = readPrototypeSize(input);
AbstractVectorModelDistribution modelDistribution = createModelDistribution(modelFactory,
@@ -217,8 +219,9 @@ public class DirichletDriver extends Abs
boolean runClustering,
boolean emitMostLikely,
double threshold,
- boolean runSequential) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
- IOException, SecurityException, NoSuchMethodException, InvocationTargetException, InterruptedException {
+ boolean runSequential)
+ throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+ IOException, NoSuchMethodException, InvocationTargetException, InterruptedException {
new DirichletDriver().job(input,
output,
@@ -242,9 +245,7 @@ public class DirichletDriver extends Abs
* @param alpha0 the double alpha_0 argument to the algorithm
* @return an initialized DirichletState
*/
- static DirichletState createState(ModelDistribution<VectorWritable> modelDistribution, int numModels, double alpha0)
- throws ClassNotFoundException, InstantiationException, IllegalAccessException, SecurityException, NoSuchMethodException,
- IllegalArgumentException, InvocationTargetException {
+ static DirichletState createState(ModelDistribution<VectorWritable> modelDistribution, int numModels, double alpha0) {
return new DirichletState(modelDistribution, numModels, alpha0);
}
@@ -277,18 +278,17 @@ public class DirichletDriver extends Abs
* @param numModels the int number of models to generate
* @param alpha0 the double alpha_0 argument to the DirichletDistribution
*/
- private void writeInitialState(Path output,
- Path stateOut,
- ModelDistribution<VectorWritable> modelDistribution,
- int numModels,
- double alpha0) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
- IOException, SecurityException, NoSuchMethodException, InvocationTargetException {
+ private static void writeInitialState(Path output,
+ Path stateOut,
+ ModelDistribution<VectorWritable> modelDistribution,
+ int numModels,
+ double alpha0) throws IOException {
DirichletState state = createState(modelDistribution, numModels, alpha0);
writeState(output, stateOut, numModels, state);
}
- private void writeState(Path output, Path stateOut, int numModels, DirichletState state) throws IOException {
+ private static void writeState(Path output, Path stateOut, int numModels, DirichletState state) throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(output.toUri(), conf);
for (int i = 0; i < numModels; i++) {
@@ -310,13 +310,13 @@ public class DirichletDriver extends Abs
* @param alpha0 alpha_0
* @param numReducers the number of Reducers desired
*/
- private void runIteration(Path input,
- Path stateIn,
- Path stateOut,
- ModelDistribution<VectorWritable> modelDistribution,
- int numClusters,
- double alpha0,
- int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
+ private static void runIteration(Path input,
+ Path stateIn,
+ Path stateOut,
+ ModelDistribution<VectorWritable> modelDistribution,
+ int numClusters,
+ double alpha0,
+ int numReducers) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set(STATE_IN_KEY, stateIn.toString());
conf.set(MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString());
@@ -378,7 +378,8 @@ public class DirichletDriver extends Abs
boolean runClustering,
boolean emitMostLikely,
double threshold,
- boolean runSequential) throws IOException, InstantiationException, IllegalAccessException,
+ boolean runSequential)
+ throws IOException, InstantiationException, IllegalAccessException,
ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException {
Path clustersOut = buildClusters(input,
output,
@@ -389,7 +390,12 @@ public class DirichletDriver extends Abs
numReducers,
runSequential);
if (runClustering) {
- clusterData(input, clustersOut, new Path(output, Cluster.CLUSTERED_POINTS_DIR), emitMostLikely, threshold, runSequential);
+ clusterData(input,
+ clustersOut,
+ new Path(output, Cluster.CLUSTERED_POINTS_DIR),
+ emitMostLikely,
+ threshold,
+ runSequential);
}
}
@@ -413,34 +419,42 @@ public class DirichletDriver extends Abs
* @param runSequential execute sequentially if true
* @return the Path of the final clusters directory
*/
- public Path buildClusters(Path input,
- Path output,
- ModelDistribution<VectorWritable> modelDistribution,
- int numClusters,
- int maxIterations,
- double alpha0,
- int numReducers,
- boolean runSequential) throws IOException, InstantiationException, IllegalAccessException,
+ public static Path buildClusters(Path input,
+ Path output,
+ ModelDistribution<VectorWritable> modelDistribution,
+ int numClusters,
+ int maxIterations,
+ double alpha0,
+ int numReducers,
+ boolean runSequential)
+ throws IOException, InstantiationException, IllegalAccessException,
ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException {
Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
writeInitialState(output, clustersIn, modelDistribution, numClusters, alpha0);
if (runSequential) {
- clustersIn = buildClustersSeq(input, output, modelDistribution, numClusters, maxIterations, alpha0, numReducers, clustersIn);
+ clustersIn = buildClustersSeq(input, output, modelDistribution, numClusters, maxIterations, alpha0, clustersIn);
} else {
- clustersIn = buildClustersMR(input, output, modelDistribution, numClusters, maxIterations, alpha0, numReducers, clustersIn);
+ clustersIn = buildClustersMR(input,
+ output,
+ modelDistribution,
+ numClusters,
+ maxIterations,
+ alpha0,
+ numReducers,
+ clustersIn);
}
return clustersIn;
}
- private Path buildClustersSeq(Path input,
- Path output,
- ModelDistribution<VectorWritable> modelDistribution,
- int numClusters,
- int maxIterations,
- double alpha0,
- int numReducers,
- Path clustersIn) throws IOException, ClassNotFoundException, InstantiationException,
+ private static Path buildClustersSeq(Path input,
+ Path output,
+ ModelDistribution<VectorWritable> modelDistribution,
+ int numClusters,
+ int maxIterations,
+ double alpha0,
+ Path clustersIn)
+ throws IOException, ClassNotFoundException, InstantiationException,
IllegalAccessException, NoSuchMethodException, InvocationTargetException {
for (int iteration = 1; iteration <= maxIterations; iteration++) {
log.info("Iteration {}", iteration);
@@ -478,14 +492,15 @@ public class DirichletDriver extends Abs
return clustersIn;
}
- private Path buildClustersMR(Path input,
- Path output,
- ModelDistribution<VectorWritable> modelDistribution,
- int numClusters,
- int maxIterations,
- double alpha0,
- int numReducers,
- Path clustersIn) throws IOException, InterruptedException, ClassNotFoundException {
+ private static Path buildClustersMR(Path input,
+ Path output,
+ ModelDistribution<VectorWritable> modelDistribution,
+ int numClusters,
+ int maxIterations,
+ double alpha0,
+ int numReducers,
+ Path clustersIn)
+ throws IOException, InterruptedException, ClassNotFoundException {
for (int iteration = 1; iteration <= maxIterations; iteration++) {
log.info("Iteration {}", iteration);
// point the output to a new directory per iteration
@@ -512,8 +527,13 @@ public class DirichletDriver extends Abs
* a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
* @param runSequential execute sequentially if true
*/
- public void clusterData(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold, boolean runSequential)
- throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ public void clusterData(Path input,
+ Path stateIn,
+ Path output,
+ boolean emitMostLikely,
+ double threshold,
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
if (runSequential) {
clusterDataSeq(input, stateIn, output, emitMostLikely, threshold);
} else {
@@ -521,8 +541,8 @@ public class DirichletDriver extends Abs
}
}
- private void clusterDataSeq(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException,
- InstantiationException, IllegalAccessException {
+ private static void clusterDataSeq(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold)
+ throws IOException, InstantiationException, IllegalAccessException {
Configuration conf = new Configuration();
List<DirichletCluster> clusters = DirichletClusterMapper.loadClusters(conf, stateIn);
DirichletClusterer clusterer = new DirichletClusterer(emitMostLikely, threshold);
@@ -552,8 +572,8 @@ public class DirichletDriver extends Abs
}
- private void clusterDataMR(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold) throws IOException,
- InterruptedException, ClassNotFoundException {
+ private static void clusterDataMR(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set(STATE_IN_KEY, stateIn.toString());
conf.set(EMIT_MOST_LIKELY_KEY, Boolean.toString(emitMostLikely));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Mon Aug 30 18:41:46 2010
@@ -83,17 +83,17 @@ public class KMeansDriver extends Abstra
int maxIterations,
int numReduceTasks,
boolean runClustering,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
- new KMeansDriver().job(input,
- clustersIn,
- output,
- measure,
- convergenceDelta,
- maxIterations,
- numReduceTasks,
- runClustering,
- runSequential);
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ job(input,
+ clustersIn,
+ output,
+ measure,
+ convergenceDelta,
+ maxIterations,
+ numReduceTasks,
+ runClustering,
+ runSequential);
}
@Override
@@ -141,8 +141,17 @@ public class KMeansDriver extends Abstra
.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
- boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
- job(input, clusters, output, measure, convergenceDelta, maxIterations, numReduceTasks, runClustering, runSequential);
+ boolean runSequential =
+ getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+ job(input,
+ clusters,
+ output,
+ measure,
+ convergenceDelta,
+ maxIterations,
+ numReduceTasks,
+ runClustering,
+ runSequential);
return 0;
}
@@ -167,24 +176,24 @@ public class KMeansDriver extends Abstra
* true if points are to be clustered after iterations are completed
* @param runSequential if true execute sequential algorithm
*/
- public void job(Path input,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- double convergenceDelta,
- int maxIterations,
- int numReduceTasks,
- boolean runClustering,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
- IllegalAccessException {
+ public static void job(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ double convergenceDelta,
+ int maxIterations,
+ int numReduceTasks,
+ boolean runClustering,
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
// iterate until the clusters converge
String delta = Double.toString(convergenceDelta);
if (log.isInfoEnabled()) {
- log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output,
- measure.getClass().getName() });
- log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta,
- maxIterations, numReduceTasks, VectorWritable.class.getName() });
+ log.info("Input: {} Clusters In: {} Out: {} Distance: {}",
+ new Object[] { input, clustersIn, output, measure.getClass().getName() });
+ log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}",
+ new Object[] { convergenceDelta, maxIterations, numReduceTasks, VectorWritable.class.getName() });
}
Path clustersOut = buildClusters(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta, runSequential);
if (runClustering) {
@@ -213,29 +222,30 @@ public class KMeansDriver extends Abstra
* the convergence delta value
* @return the Path of the final clusters directory
*/
- public Path buildClusters(Path input,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- int maxIterations,
- int numReduceTasks,
- String delta,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
+ public static Path buildClusters(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ int maxIterations,
+ int numReduceTasks,
+ String delta,
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
if (runSequential) {
- return buildClustersSeq(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta);
+ return buildClustersSeq(input, clustersIn, output, measure, maxIterations, delta);
} else {
return buildClustersMR(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta);
}
}
- private Path buildClustersSeq(Path input,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- int maxIterations,
- int numReduceTasks,
- String delta) throws InstantiationException, IllegalAccessException, IOException {
+ private static Path buildClustersSeq(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ int maxIterations,
+ String delta)
+ throws InstantiationException, IllegalAccessException, IOException {
+
KMeansClusterer clusterer = new KMeansClusterer(measure);
List<Cluster> clusters = new ArrayList<Cluster>();
@@ -253,7 +263,7 @@ public class KMeansDriver extends Abstra
for (FileStatus s : status) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
try {
- WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+ Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
while (reader.next(key, vw)) {
clusterer.addPointToNearestCluster(vw.get(), clusters);
@@ -286,13 +296,15 @@ public class KMeansDriver extends Abstra
return clustersIn;
}
- private Path buildClustersMR(Path input,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- int maxIterations,
- int numReduceTasks,
- String delta) throws IOException, InterruptedException, ClassNotFoundException {
+ private static Path buildClustersMR(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ int maxIterations,
+ int numReduceTasks,
+ String delta)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
boolean converged = false;
int iteration = 1;
while (!converged && (iteration <= maxIterations)) {
@@ -324,12 +336,14 @@ public class KMeansDriver extends Abstra
* the number of reducer tasks
* @return true if the iteration successfully runs
*/
- private boolean runIteration(Path input,
- Path clustersIn,
- Path clustersOut,
- String measureClass,
- String convergenceDelta,
- int numReduceTasks) throws IOException, InterruptedException, ClassNotFoundException {
+ private static boolean runIteration(Path input,
+ Path clustersIn,
+ Path clustersOut,
+ String measureClass,
+ String convergenceDelta,
+ int numReduceTasks)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
Configuration conf = new Configuration();
conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
@@ -373,7 +387,7 @@ public class KMeansDriver extends Abstra
* @throws IOException
* if there was an IO error
*/
- private boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
+ private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
FileStatus[] parts = fs.listStatus(filePath);
for (FileStatus part : parts) {
String name = part.getPath().getName();
@@ -416,27 +430,32 @@ public class KMeansDriver extends Abstra
* the convergence delta value
* @param runSequential if true execute sequential algorithm
*/
- public void clusterData(Path input,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- String convergenceDelta,
- boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
- InstantiationException, IllegalAccessException {
+ public static void clusterData(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ String convergenceDelta,
+ boolean runSequential)
+ throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+
if (log.isInfoEnabled()) {
log.info("Running Clustering");
log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output, measure });
log.info("convergence: {} Input Vectors: {}", convergenceDelta, VectorWritable.class.getName());
}
if (runSequential) {
- clusterDataSeq(input, clustersIn, output, measure, convergenceDelta);
+ clusterDataSeq(input, clustersIn, output, measure);
} else {
clusterDataMR(input, clustersIn, output, measure, convergenceDelta);
}
}
- private void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta)
- throws IOException, InterruptedException, InstantiationException, IllegalAccessException {
+ private static void clusterDataSeq(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure)
+ throws IOException, InterruptedException, InstantiationException, IllegalAccessException {
+
KMeansClusterer clusterer = new KMeansClusterer(measure);
List<Cluster> clusters = new ArrayList<Cluster>();
KMeansUtil.configureWithClusterInfo(clustersIn, clusters);
@@ -469,8 +488,13 @@ public class KMeansDriver extends Abstra
}
- private void clusterDataMR(Path input, Path clustersIn, Path output, DistanceMeasure measure, String convergenceDelta)
- throws IOException, InterruptedException, ClassNotFoundException {
+ private static void clusterDataMR(Path input,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ String convergenceDelta)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
Configuration conf = new Configuration();
conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon Aug 30 18:41:46 2010
@@ -36,7 +36,12 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.slf4j.Logger;
@@ -68,10 +73,6 @@ import org.slf4j.LoggerFactory;
* <p>Note that because of how Hadoop parses arguments, all "-D" arguments must appear before all other
* arguments.</p>
*/
-/**
- * @author jeff
- *
- */
public abstract class AbstractJob extends Configured implements Tool {
private static final Logger log = LoggerFactory.getLogger(AbstractJob.class);
@@ -321,7 +322,7 @@ public abstract class AbstractJob extend
* specified or outputOption is present and neither <code>--output</code>
* nor <code>-Dmapred.output.dir</code> are specified.
*/
- protected void parseDirectories(CommandLine cmdLine) throws IllegalArgumentException {
+ protected void parseDirectories(CommandLine cmdLine) {
Configuration conf = getConf();
@@ -340,13 +341,13 @@ public abstract class AbstractJob extend
}
if (inputOption != null && inputPath == null) {
- throw new IllegalArgumentException("No input specified: " + inputOption.getPreferredName() + " or -Dmapred.input.dir "
- + "must be provided to specify input directory");
+ throw new IllegalArgumentException("No input specified: " + inputOption.getPreferredName()
+ + " or -Dmapred.input.dir must be provided to specify input directory");
}
if (outputOption != null && outputPath == null) {
- throw new IllegalArgumentException("No output specified: " + outputOption.getPreferredName() + " or -Dmapred.output.dir "
- + "must be provided to specify output directory");
+ throw new IllegalArgumentException("No output specified: " + outputOption.getPreferredName()
+ + " or -Dmapred.output.dir must be provided to specify output directory");
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java Mon Aug 30 18:41:46 2010
@@ -262,9 +262,11 @@ public final class DataLoader {
* @param values
* used to convert CATEGORICAL attributes to Integer
*/
+ /*
private static Data constructData(Attribute[] attrs, List<Instance> vectors, List<String>[] values) {
Dataset dataset = new Dataset(attrs, values, vectors.size());
return new Data(dataset, vectors);
}
+ */
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Mon Aug 30 18:41:46 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.df.data;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java Mon Aug 30 18:41:46 2010
@@ -34,17 +34,14 @@ public class ParallelFPGrowthCombiner ex
Reducer<LongWritable,TransactionTree,LongWritable,TransactionTree> {
@Override
- protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException,
- InterruptedException {
+ protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context)
+ throws IOException, InterruptedException {
TransactionTree cTree = new TransactionTree();
- int count = 0;
- int node = 0;
for (TransactionTree tr : values) {
Iterator<Pair<List<Integer>,Long>> it = tr.getIterator();
while (it.hasNext()) {
Pair<List<Integer>,Long> p = it.next();
- node += cTree.addPattern(p.getFirst(), p.getSecond());
- count++;
+ cTree.addPattern(p.getFirst(), p.getSecond());
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java Mon Aug 30 18:41:46 2010
@@ -77,10 +77,8 @@ public abstract class MahoutTestCase ext
Path testTempDirPath = getTestTempDirPath();
Path tempFileOrDir = fs.makeQualified(new Path(testTempDirPath, name));
fs.deleteOnExit(tempFileOrDir);
- if (dir) {
- if (!fs.mkdirs(tempFileOrDir)) {
- throw new IOException("Could not create " + tempFileOrDir);
- }
+ if (dir && !fs.mkdirs(tempFileOrDir)) {
+ throw new IOException("Could not create " + tempFileOrDir);
}
return tempFileOrDir;
}
@@ -103,7 +101,7 @@ public abstract class MahoutTestCase ext
/**
* find a declared field in a class or one of it's super classes
*/
- private Field findDeclaredField(Class<?> inClass, String fieldname) throws NoSuchFieldException {
+ private static Field findDeclaredField(Class<?> inClass, String fieldname) throws NoSuchFieldException {
if (Object.class.equals(inClass)) {
throw new NoSuchFieldException();
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java Mon Aug 30 18:41:46 2010
@@ -30,7 +30,7 @@ public class StringUtilsTest extends Mah
if (this == obj) {
return true;
}
- if (obj == null || !(obj instanceof DummyTest)) {
+ if (!(obj instanceof DummyTest)) {
return false;
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java Mon Aug 30 18:41:46 2010
@@ -46,7 +46,7 @@ public class PartialBuilderTest extends
private static final int NUM_TREES = 32;
/** instances per partition */
- private static final int numInstances = 20;
+ private static final int NUM_INSTANCES = 20;
public void testProcessOutput() throws Exception {
Configuration conf = new Configuration();
@@ -130,13 +130,13 @@ public class PartialBuilderTest extends
Node tree = new Leaf(rng.nextInt(100));
keys[index] = new TreeID(partition, treeId);
- values[index] = new MapredOutput(tree, nextIntArray(rng, numInstances));
+ values[index] = new MapredOutput(tree, nextIntArray(rng, NUM_INSTANCES));
index++;
}
firstIds[p] = firstId;
- firstId += numInstances;
+ firstId += NUM_INSTANCES;
}
}
@@ -210,13 +210,13 @@ public class PartialBuilderTest extends
@Override
public void prediction(int treeId, int instanceId, int prediction) {
- int partition = instanceId / numInstances;
+ int partition = instanceId / NUM_INSTANCES;
TreeID key = new TreeID(partition, treeId);
int index = ArrayUtils.indexOf(keys, key);
assertTrue("key not found", index >= 0);
- assertEquals(values[index].getPredictions()[instanceId % numInstances],
+ assertEquals(values[index].getPredictions()[instanceId % NUM_INSTANCES],
prediction);
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyCandidate.java Mon Aug 30 18:41:46 2010
@@ -43,7 +43,7 @@ public class DummyCandidate {
if (this == obj) {
return true;
}
- if (obj == null || !(obj instanceof DummyCandidate)) {
+ if (!(obj instanceof DummyCandidate)) {
return false;
}
Modified: mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml Mon Aug 30 18:41:46 2010
@@ -24,7 +24,7 @@
<rule ref="rulesets/basic.xml/BooleanInstantiation"/>
<rule ref="rulesets/basic.xml/CollapsibleIfStatements"/>
<rule ref="rulesets/basic.xml/DoubleCheckedLocking"/>
- <rule ref="rulesets/basic.xml/EmptyCatchBlock"/>
+ <!--<rule ref="rulesets/basic.xml/EmptyCatchBlock"/>-->
<rule ref="rulesets/basic.xml/EmptyFinallyBlock"/>
<rule ref="rulesets/basic.xml/EmptyIfStmt"/>
<rule ref="rulesets/basic.xml/EmptyStatementNotInLoop"/>
@@ -52,7 +52,7 @@
<!--<rule ref="rulesets/clone.xml/CloneThrowsCloneNotSupportedException"/>-->
<!--<rule ref="rulesets/clone.xml/ProperCloneImplementation"/>-->
- <rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>
+ <!--<rule ref="rulesets/codesize.xml/CyclomaticComplexity"/>-->
<rule ref="rulesets/codesize.xml/ExcessiveClassLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveMethodLength"/>
<rule ref="rulesets/codesize.xml/ExcessiveParameterList"/>
@@ -78,7 +78,7 @@
<!--<rule ref="rulesets/design.xml/AbstractClassWithoutAbstractMethod"/>-->
<!--<rule ref="rulesets/design.xml/AccessorClassGeneration"/>-->
<!--<rule ref="rulesets/design.xml/AssignmentToNonFinalStatic"/>-->
- <rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>
+ <!--<rule ref="rulesets/design.xml/AvoidDeeplyNestedIfStmts"/>-->
<!--<rule ref="rulesets/design.xml/AvoidInstanceofChecksInCatchClause"/>-->
<rule ref="rulesets/design.xml/AvoidProtectedFieldInFinalClass"/>
<!--<rule ref="rulesets/design.xml/AvoidReassigningParameters"/>-->
@@ -153,8 +153,8 @@
<!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
<!--<rule ref="rulesets/naming.xml/SuspiciousConstantFieldName"/>-->
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>
- <rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingTypeName"/>-->
+ <!--<rule ref="rulesets/naming.xml/AvoidFieldNameMatchingMethodName"/>-->
<!-- <rule ref="rulesets/naming.xml/AvoidNonConstructorMethodsWithClassName"/> -->
<rule ref="rulesets/naming.xml/NoPackage"/>
<rule ref="rulesets/naming.xml/PackageCase"/>
@@ -163,7 +163,7 @@
<!--<rule ref="rulesets/optimizations.xml/MethodArgumentCouldBeFinal"/>-->
<!--<rule ref="rulesets/optimizations.xml/AvoidInstantiatingObjectsInLoops"/>-->
<!--<rule ref="rulesets/optimizations.xml/UseArrayListInsteadOfVector"/>-->
- <rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>
+ <!--<rule ref="rulesets/optimizations.xml/SimplifyStartsWith"/>-->
<rule ref="rulesets/optimizations.xml/UseStringBufferForStringAppends"/>
<!--<rule ref="rulesets/strictexception.xml/AvoidCatchingThrowable"/>-->
@@ -173,7 +173,7 @@
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingRawExceptionTypes"/>-->
<!--<rule ref="rulesets/strictexception.xml/AvoidThrowingNullPointerException"/>-->
- <rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>
+ <!--<rule ref="rulesets/strings.xml/AvoidDuplicateLiterals"/>-->
<rule ref="rulesets/strings.xml/StringInstantiation"/>
<rule ref="rulesets/strings.xml/StringToString"/>
<!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->
Modified: mahout/trunk/etc/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/etc/findbugs-exclude.xml?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/etc/findbugs-exclude.xml (original)
+++ mahout/trunk/etc/findbugs-exclude.xml Mon Aug 30 18:41:46 2010
@@ -10,18 +10,21 @@
<Bug pattern="SE_NO_SERIALVERSIONID"/>
</Match>
<Match>
+ <Bug pattern="EI_EXPOSE_REP"/>
+ </Match>
+ <Match>
<Bug pattern="EI_EXPOSE_REP2"/>
- </Match>
- <Match>
+ </Match>
+ <Match>
<Bug pattern="SIC_INNER_SHOULD_BE_STATIC_ANON"/>
- </Match>
- <Match>
+ </Match>
+ <Match>
<Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING"/>
</Match>
- <Match>
+ <Match>
<Bug pattern="SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"/>
</Match>
- <Match>
+ <Match>
<Bug pattern="SE_BAD_FIELD"/>
</Match>
<Match>
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java Mon Aug 30 18:41:46 2010
@@ -62,7 +62,6 @@ public final class WikipediaDatasetCreat
* <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
* {@link org.apache.hadoop.io.SequenceFile}</li>
* </ol>
- * @throws InterruptedException
*/
public static void main(String[] args) throws IOException, InterruptedException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -144,14 +143,13 @@ public final class WikipediaDatasetCreat
* @param exactMatchOnly
* if true, then the Wikipedia category must match exactly instead of simply containing the
* category string
- * @throws ClassNotFoundException
- * @throws InterruptedException
*/
public static void runJob(String input,
String output,
String catFile,
boolean exactMatchOnly,
- Class<? extends Analyzer> analyzerClass) throws IOException, InterruptedException, ClassNotFoundException {
+ Class<? extends Analyzer> analyzerClass)
+ throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("key.value.separator.in.input.line", " ");
conf.set("xmlinput.start", "<text xml:space=\"preserve\">");
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/LogisticModelParameters.java Mon Aug 30 18:41:46 2010
@@ -18,11 +18,26 @@
package org.apache.mahout.classifier.sgd;
import com.google.common.collect.Maps;
-import com.google.gson.*;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.InstanceCreator;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
-import java.io.*;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Writer;
import java.lang.reflect.Type;
import java.util.Iterator;
import java.util.List;
@@ -39,14 +54,11 @@ public class LogisticModelParameters {
private int numFeatures;
private boolean useBias;
private int maxTargetCategories;
- private List<String> targetCategories = null;
+ private List<String> targetCategories;
private double lambda;
private double learningRate;
- private transient CsvRecordFactory csv = null;
- private OnlineLogisticRegression lr = null;
-
- public LogisticModelParameters() {
- }
+ private transient CsvRecordFactory csv;
+ private OnlineLogisticRegression lr;
/**
* Returns a CsvRecordFactory compatible with this logistic model. The reason that this is tied
@@ -78,12 +90,14 @@ public class LogisticModelParameters {
lr = new OnlineLogisticRegression(getMaxTargetCategories(), getNumFeatures(), new L1())
.lambda(getLambda())
.learningRate(getLearningRate())
- .alpha(1 - 1e-3);
+ .alpha(1 - 1.0e-3);
}
return lr;
}
- public static void saveModel(Writer out, OnlineLogisticRegression model, List<String> targetCategories) throws IOException {
+ public static void saveModel(Writer out,
+ OnlineLogisticRegression model,
+ List<String> targetCategories) throws IOException {
LogisticModelParameters x = new LogisticModelParameters();
x.setTargetCategories(targetCategories);
x.setLambda(model.getLambda());
@@ -134,7 +148,7 @@ public class LogisticModelParameters {
* @throws IOException If there is an error opening or closing the file.
*/
public static LogisticModelParameters loadFrom(File in) throws IOException {
- FileReader input = new FileReader(in);
+ InputStreamReader input = new FileReader(in);
LogisticModelParameters r = loadFrom(input);
input.close();
return r;
@@ -147,17 +161,17 @@ public class LogisticModelParameters {
* @param predictorList The list of variable names.
* @param typeList The list of types in the format preferred by CsvRecordFactory.
*/
- public void setTypeMap(List predictorList, List typeList) {
+ public void setTypeMap(List<String> predictorList, List<String> typeList) {
typeMap = Maps.newHashMap();
- if (typeList.size() == 0) {
+ if (typeList.isEmpty()) {
throw new IllegalArgumentException("Must have at least one type specifier");
}
- Iterator iTypes = typeList.iterator();
+ Iterator<String> iTypes = typeList.iterator();
String lastType = null;
for (Object x : predictorList) {
// type list can be short .. we just repeat last spec
if (iTypes.hasNext()) {
- lastType = iTypes.next().toString();
+ lastType = iTypes.next();
}
typeMap.put(x.toString(), lastType);
}
@@ -234,7 +248,8 @@ public class LogisticModelParameters {
* Tells GSON how to (de)serialize a Mahout matrix. We assume on deserialization that
* the matrix is dense.
*/
- public static class MatrixTypeAdapter implements JsonDeserializer<Matrix>, JsonSerializer<Matrix>, InstanceCreator<Matrix> {
+ public static class MatrixTypeAdapter
+ implements JsonDeserializer<Matrix>, JsonSerializer<Matrix>, InstanceCreator<Matrix> {
@Override
public JsonElement serialize(Matrix m, Type type, JsonSerializationContext jsonSerializationContext) {
JsonObject r = new JsonObject();
@@ -253,7 +268,7 @@ public class LogisticModelParameters {
}
@Override
- public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) throws JsonParseException {
+ public Matrix deserialize(JsonElement x, Type type, JsonDeserializationContext jsonDeserializationContext) {
JsonObject data = x.getAsJsonObject();
Matrix r = new DenseMatrix(data.get("rows").getAsInt(), data.get("cols").getAsInt());
int i = 0;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java Mon Aug 30 18:41:46 2010
@@ -29,23 +29,21 @@ import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.classifier.evaluation.Auc;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
-/**
- *
- */
public class RunLogistic {
- private static final Logger log = LoggerFactory.getLogger(RunLogistic.class);
+
private static String inputFile;
private static String modelFile;
- private static boolean showAuc = false;
- private static boolean showScores = false;
- private static boolean showConfusion = false;
+ private static boolean showAuc;
+ private static boolean showScores;
+ private static boolean showConfusion;
+
+ private RunLogistic() {
+ }
public static void main(String[] args) throws IOException {
if (parseArgs(args)) {
@@ -82,15 +80,17 @@ public class RunLogistic {
}
if (showConfusion) {
Matrix m = collector.confusion();
- System.out.printf("confusion: [[%.1f, %.1f], [%.1f, %.1f]]\n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
+ System.out.printf("confusion: [[%.1f, %.1f], [%.1f, %.1f]]\n",
+ m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
m = collector.entropy();
- System.out.printf("entropy: [[%.1f, %.1f], [%.1f, %.1f]]\n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
+ System.out.printf("entropy: [[%.1f, %.1f], [%.1f, %.1f]]\n",
+ m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
}
}
}
private static boolean parseArgs(String[] args) {
- DefaultOptionBuilder builder = new DefaultOptionBuilder();
+ DefaultOptionBuilder builder = new DefaultOptionBuilder();
Option help = builder.withLongName("help").withDescription("print this list").create();
@@ -102,13 +102,13 @@ public class RunLogistic {
Option scores = builder.withLongName("scores").withDescription("print scores").create();
ArgumentBuilder argumentBuilder = new ArgumentBuilder();
- Option inputFile = builder.withLongName("input")
+ Option inputFileOption = builder.withLongName("input")
.withRequired(true)
.withArgument(argumentBuilder.withName("input").withMaximum(1).create())
.withDescription("where to get training data")
.create();
- Option modelFile = builder.withLongName("model")
+ Option modelFileOption = builder.withLongName("model")
.withRequired(true)
.withArgument(argumentBuilder.withName("model").withMaximum(1).create())
.withDescription("where to get a model")
@@ -120,8 +120,8 @@ public class RunLogistic {
.withOption(auc)
.withOption(scores)
.withOption(confusion)
- .withOption(inputFile)
- .withOption(modelFile)
+ .withOption(inputFileOption)
+ .withOption(modelFileOption)
.create();
Parser parser = new Parser();
@@ -129,18 +129,17 @@ public class RunLogistic {
parser.setHelpTrigger("--help");
parser.setGroup(normalArgs);
parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
- CommandLine cmdLine;
- cmdLine = parser.parseAndHelp(args);
+ CommandLine cmdLine = parser.parseAndHelp(args);
if (cmdLine == null) {
return false;
}
- RunLogistic.inputFile = getStringArgument(cmdLine, inputFile);
- RunLogistic.modelFile = getStringArgument(cmdLine, modelFile);
- RunLogistic.showAuc = getBooleanArgument(cmdLine, auc);
- RunLogistic.showScores = getBooleanArgument(cmdLine, scores);
- RunLogistic.showConfusion = getBooleanArgument(cmdLine, confusion);
+ inputFile = getStringArgument(cmdLine, inputFileOption);
+ modelFile = getStringArgument(cmdLine, modelFileOption);
+ showAuc = getBooleanArgument(cmdLine, auc);
+ showScores = getBooleanArgument(cmdLine, scores);
+ showConfusion = getBooleanArgument(cmdLine, confusion);
return true;
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Mon Aug 30 18:41:46 2010
@@ -226,8 +226,7 @@ public class TrainLogistic {
parser.setHelpTrigger("--help");
parser.setGroup(normalArgs);
parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
- CommandLine cmdLine;
- cmdLine = parser.parseAndHelp(args);
+ CommandLine cmdLine = parser.parseAndHelp(args);
if (cmdLine == null) {
return false;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Mon Aug 30 18:41:46 2010
@@ -30,6 +30,7 @@ import java.awt.geom.Ellipse2D;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
@@ -58,14 +59,15 @@ public class DisplayClustering extends F
protected static final int SIZE = 8; // screen size in inches
- private static final List<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
+ private static final Collection<Vector> SAMPLE_PARAMS = new ArrayList<Vector>();
protected static final List<VectorWritable> SAMPLE_DATA = new ArrayList<VectorWritable>();
protected static final List<List<Cluster>> CLUSTERS = new ArrayList<List<Cluster>>();
- protected static final Color[] COLORS = { Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta,
- Color.lightGray };
+ protected static final Color[] COLORS = {
+ Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta, Color.lightGray
+ };
protected static final double T1 = 3.0;
@@ -166,7 +168,7 @@ public class DisplayClustering extends F
* a Vector of rectangle dimensions
*/
protected static void plotRectangle(Graphics2D g2, Vector v, Vector dv) {
- double[] flip = { 1, -1 };
+ double[] flip = {1, -1};
Vector v2 = v.times(new DenseVector(flip));
v2 = v2.minus(dv.divide(2));
int h = SIZE / 2;
@@ -186,7 +188,7 @@ public class DisplayClustering extends F
* a Vector of ellipse dimensions
*/
protected static void plotEllipse(Graphics2D g2, Vector v, Vector dv) {
- double[] flip = { 1, -1 };
+ double[] flip = {1, -1};
Vector v2 = v.times(new DenseVector(flip));
v2 = v2.minus(dv.divide(2));
int h = SIZE / 2;
@@ -220,7 +222,7 @@ public class DisplayClustering extends F
* double standard deviation of the samples
*/
protected static void generateSamples(int num, double mx, double my, double sd) {
- double[] params = { mx, my, sd, sd };
+ double[] params = {mx, my, sd, sd};
SAMPLE_PARAMS.add(new DenseVector(params));
log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
for (int i = 0; i < num; i++) {
@@ -242,7 +244,8 @@ public class DisplayClustering extends F
}
}
- protected static List<Cluster> readClusters(Path clustersIn) throws IOException, InstantiationException, IllegalAccessException {
+ protected static List<Cluster> readClusters(Path clustersIn)
+ throws IOException, InstantiationException, IllegalAccessException {
List<Cluster> clusters = new ArrayList<Cluster>();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
@@ -254,8 +257,12 @@ public class DisplayClustering extends F
Writable value = (Writable) reader.getValueClass().newInstance();
while (reader.next(key, value)) {
Cluster cluster = (Cluster) value;
- log.info("Reading Cluster:" + cluster.getId() + " center:" + AbstractCluster.formatVector(cluster.getCenter(), null)
- + " numPoints:" + cluster.getNumPoints() + " radius:" + AbstractCluster.formatVector(cluster.getRadius(), null));
+ log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", new Object[] {
+ cluster.getId(),
+ AbstractCluster.formatVector(cluster.getCenter(), null),
+ cluster.getNumPoints(),
+ AbstractCluster.formatVector(cluster.getRadius(), null)
+ });
clusters.add(cluster);
value = (Writable) reader.getValueClass().newInstance();
}
@@ -291,7 +298,7 @@ public class DisplayClustering extends F
* double y-value standard deviation of the samples
*/
protected static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
- double[] params = { mx, my, sdx, sdy };
+ double[] params = {mx, my, sdx, sdy};
SAMPLE_PARAMS.add(new DenseVector(params));
log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
for (int i = 0; i < num; i++) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java Mon Aug 30 18:41:46 2010
@@ -53,8 +53,7 @@ public final class Job extends FuzzyKMea
log.info("Running with default arguments");
Path output = new Path("output");
HadoopUtil.overwriteOutput(output);
- new Job().job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, 1, (float) 2, 0.5, true);
-
+ job(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 10, 1, (float) 2, 0.5);
}
}
@@ -66,7 +65,8 @@ public final class Job extends FuzzyKMea
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
- + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+ + "If k is also specified, then a random set of vectors will be selected"
+ + " and written out to this path first")
.create());
addOption(DefaultOptionCreator.numClustersOption()
.withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
@@ -112,7 +112,7 @@ public final class Job extends FuzzyKMea
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
- job(input, output, measure, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta, runClustering);
+ job(input, output, measure, t1, t2, maxIterations, numReduceTasks, fuzziness, convergenceDelta);
return 0;
}
@@ -128,7 +128,7 @@ public final class Job extends FuzzyKMea
* the String denoting the input directory path
* @param output
* the String denoting the output directory path
- * @param measureClass
+ * @param t1
* the canopy T1 threshold
* @param t2
* the canopy T2 threshold
@@ -140,20 +140,17 @@ public final class Job extends FuzzyKMea
* the float "m" fuzziness coefficient
* @param convergenceDelta
* the double convergence criteria for iterations
- * @param runClustering
- * the int maximum number of iterations
*/
- private void job(Path input,
- Path output,
- DistanceMeasure measure,
- double t1,
- double t2,
- int maxIterations,
- int numReducerTasks,
- float fuzziness,
- double convergenceDelta,
- boolean runClustering) throws IOException, InstantiationException, IllegalAccessException, InterruptedException,
- ClassNotFoundException {
+ private static void job(Path input,
+ Path output,
+ DistanceMeasure measure,
+ double t1,
+ double t2,
+ int maxIterations,
+ int numReducerTasks,
+ float fuzziness,
+ double convergenceDelta)
+ throws IOException, InstantiationException, IllegalAccessException, InterruptedException, ClassNotFoundException {
Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT);
log.info("Preparing Input");
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Mon Aug 30 18:41:46 2010
@@ -210,7 +210,7 @@ public class TestForest extends Configur
if (dataFS.getFileStatus(dataPath).isDir()) {
//the input is a directory of files
- testDirectory(dataPath, outputPath, converter, forest, dataset, analyzer, rng);
+ testDirectory(outputPath, converter, forest, dataset, analyzer, rng);
} else {
// the input is one single file
testFile(dataPath, outputPath, converter, forest, dataset, analyzer, rng);
@@ -224,7 +224,7 @@ public class TestForest extends Configur
}
}
- private void testDirectory(Path inPath, Path outPath, DataConverter converter, DecisionForest forest, Dataset dataset,
+ private void testDirectory(Path outPath, DataConverter converter, DecisionForest forest, Dataset dataset,
ResultAnalyzer analyzer, Random rng) throws IOException {
Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java?rev=990892&r1=990891&r2=990892&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitness.java Mon Aug 30 18:41:46 2010
@@ -86,7 +86,7 @@ public class CDFitness implements Writab
if (this == obj) {
return true;
}
- if (obj == null || !(obj instanceof CDFitness)) {
+ if (!(obj instanceof CDFitness)) {
return false;
}