You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC
svn commit: r1352052 [3/7] - in /mahout/trunk: ./ buildtools/
buildtools/src/main/resources/ core/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/cf/t...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Wed Jun 20 12:07:50 2012
@@ -44,7 +44,6 @@ import org.apache.mahout.common.iterator
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,7 +63,8 @@ import java.util.List;
* <dl>
* <dt>{@code --input path}</td>
* <dd>Input path for {@code SequenceFile<IntWritable, VectorWritable>} document vectors. See
- * {@link SparseVectorsFromSequenceFiles} for details on how to generate this input format.</dd>
+ * {@link org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles}
+ * for details on how to generate this input format.</dd>
* <dt>{@code --dictionary path}</dt>
* <dd>Path to dictionary file(s) generated during construction of input document vectors (glob
* expression supported). If set, this data is scanned to determine an appropriate value for option
@@ -224,7 +224,7 @@ public class CVB0Driver extends Abstract
int maxItersPerDoc,
int numReduceTasks,
boolean backfillPerplexity)
- throws ClassNotFoundException, IOException, InterruptedException {
+ throws ClassNotFoundException, IOException, InterruptedException {
// verify arguments
Preconditions.checkArgument(testFraction >= 0.0 && testFraction <= 1.0,
"Expected 'testFraction' value in range [0, 1] but found value '%s'", testFraction);
@@ -314,7 +314,7 @@ public class CVB0Driver extends Abstract
}
}
log.info("Completed {} iterations in {} seconds", iterationNumber,
- (System.currentTimeMillis() - startTime)/1000);
+ (System.currentTimeMillis() - startTime) / 1000);
log.info("Perplexities: ({})", Joiner.on(", ").join(perplexities));
// write final topic-term and doc-topic distributions
@@ -343,8 +343,7 @@ public class CVB0Driver extends Abstract
}
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
- throws IOException,
- ClassNotFoundException, InterruptedException {
+ throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "Calculating perplexity for " + modelPath;
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
@@ -402,7 +401,7 @@ public class CVB0Driver extends Abstract
* @throws IOException
*/
public static double readPerplexity(Configuration conf, Path topicModelStateTemp, int iteration)
- throws IOException {
+ throws IOException {
Path perplexityPath = perplexityPath(topicModelStateTemp, iteration);
FileSystem fs = FileSystem.get(perplexityPath.toUri(), conf);
if (!fs.exists(perplexityPath)) {
@@ -423,10 +422,9 @@ public class CVB0Driver extends Abstract
return perplexity / modelWeight;
}
- private static Job writeTopicModel(Configuration conf, Path modelInput, Path output) throws IOException,
- InterruptedException, ClassNotFoundException {
- String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput,
- output);
+ private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
+ throws IOException, InterruptedException, ClassNotFoundException {
+ String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
@@ -443,9 +441,8 @@ public class CVB0Driver extends Abstract
}
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
- throws IOException, ClassNotFoundException, InterruptedException {
- String jobName = String.format("Writing final document/topic inference from %s to %s", corpus,
- output);
+ throws IOException, ClassNotFoundException, InterruptedException {
+ String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setMapperClass(CVB0DocInferenceMapper.class);
@@ -483,7 +480,7 @@ public class CVB0Driver extends Abstract
}
private static int getCurrentIterationNumber(Configuration config, Path modelTempDir, int maxIterations)
- throws IOException {
+ throws IOException {
FileSystem fs = FileSystem.get(modelTempDir.toUri(), config);
int iterationNumber = 1;
Path iterationPath = modelPath(modelTempDir, iterationNumber);
@@ -496,7 +493,8 @@ public class CVB0Driver extends Abstract
}
public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
- int iterationNumber, int maxIterations, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException {
+ int iterationNumber, int maxIterations, int numReduceTasks)
+ throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Iteration %d of %d, input path: %s",
iterationNumber, maxIterations, modelInput);
log.info("About to run: " + jobName);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java Wed Jun 20 12:07:50 2012
@@ -96,7 +96,7 @@ public class CachingCVB0PerplexityMapper
@Override
public void map(IntWritable docId, VectorWritable document, Context context)
throws IOException, InterruptedException{
- if (1 > testFraction && random.nextFloat() >= testFraction) {
+ if (testFraction < 1.0f && random.nextFloat() >= testFraction) {
return;
}
context.getCounter(Counters.SAMPLED_DOCUMENTS).increment(1);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Wed Jun 20 12:07:50 2012
@@ -71,19 +71,14 @@ public class InMemoryCollapsedVariationa
private int numDocuments;
private double alpha;
private double eta;
- private int minDfCt;
- private double maxDfPct;
+ //private int minDfCt;
+ //private double maxDfPct;
private boolean verbose = false;
-
- private Map<String, Integer> termIdMap;
private String[] terms; // of length numTerms;
private Matrix corpusWeights; // length numDocs;
private double totalCorpusWeight;
private double initialModelCorpusFraction;
private Matrix docTopicCounts;
- private long seed;
- private TopicModel topicModel;
- private TopicModel updatedModel;
private int numTrainingThreads;
private int numUpdatingThreads;
private ModelTrainer modelTrainer;
@@ -96,26 +91,34 @@ public class InMemoryCollapsedVariationa
this.verbose = verbose;
}
- public InMemoryCollapsedVariationalBayes0(Matrix corpus, String[] terms, int numTopics,
- double alpha, double eta) {
- this(corpus, terms, numTopics, alpha, eta, 1, 1, 0, 1234);
+ public InMemoryCollapsedVariationalBayes0(Matrix corpus,
+ String[] terms,
+ int numTopics,
+ double alpha,
+ double eta) {
+ this(corpus, terms, numTopics, alpha, eta, 1, 1, 0);
}
- public InMemoryCollapsedVariationalBayes0(Matrix corpus, String[] terms, int numTopics,
- double alpha, double eta, int numTrainingThreads, int numUpdatingThreads,
- double modelCorpusFraction, long seed) {
- this.seed = seed;
+ public InMemoryCollapsedVariationalBayes0(Matrix corpus,
+ String[] terms,
+ int numTopics,
+ double alpha,
+ double eta,
+ int numTrainingThreads,
+ int numUpdatingThreads,
+ double modelCorpusFraction) {
+ //this.seed = seed;
this.numTopics = numTopics;
this.alpha = alpha;
this.eta = eta;
- this.minDfCt = 0;
- this.maxDfPct = 1.0f;
+ //this.minDfCt = 0;
+ //this.maxDfPct = 1.0f;
corpusWeights = corpus;
numDocuments = corpus.numRows();
this.terms = terms;
this.initialModelCorpusFraction = modelCorpusFraction;
numTerms = terms != null ? terms.length : corpus.numCols();
- termIdMap = Maps.newHashMap();
+ Map<String, Integer> termIdMap = Maps.newHashMap();
if (terms != null) {
for (int t=0; t<terms.length; t++) {
termIdMap.put(terms[t], t);
@@ -143,12 +146,12 @@ public class InMemoryCollapsedVariationa
}
private void initializeModel() {
- topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
- numUpdatingThreads,
- initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
+ TopicModel topicModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(), terms,
+ numUpdatingThreads,
+ initialModelCorpusFraction == 0 ? 1 : initialModelCorpusFraction * totalCorpusWeight);
topicModel.setConf(getConf());
- updatedModel = initialModelCorpusFraction == 0
+ TopicModel updatedModel = initialModelCorpusFraction == 0
? new TopicModel(numTopics, numTerms, eta, alpha, null, terms, numUpdatingThreads, 1)
: topicModel;
updatedModel.setConf(getConf());
@@ -157,6 +160,7 @@ public class InMemoryCollapsedVariationa
modelTrainer = new ModelTrainer(topicModel, updatedModel, numTrainingThreads, numTopics, numTerms);
}
+ /*
private void inferDocuments(double convergence, int maxIter, boolean recalculate) {
for (int docId = 0; docId < corpusWeights.numRows() ; docId++) {
Vector inferredDocument = topicModel.infer(corpusWeights.viewRow(docId),
@@ -164,6 +168,7 @@ public class InMemoryCollapsedVariationa
// do what now?
}
}
+ */
public void trainDocuments() {
trainDocuments(0);
@@ -372,7 +377,7 @@ public class InMemoryCollapsedVariationa
int numUpdateThreads = Integer.parseInt((String)cmdLine.getValue(numUpdateThreadsOpt));
String topicOutFile = (String)cmdLine.getValue(outputTopicFileOpt);
String docOutFile = (String)cmdLine.getValue(outputDocFileOpt);
- String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
+ //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt);
boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption);
@@ -390,7 +395,7 @@ public class InMemoryCollapsedVariationa
start = System.nanoTime();
InMemoryCollapsedVariationalBayes0 cvb0 =
new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta,
- numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234);
+ numTrainThreads, numUpdateThreads, modelCorpusFraction);
logTime("cvb0 init", System.nanoTime() - start);
start = System.nanoTime();
@@ -398,11 +403,13 @@ public class InMemoryCollapsedVariationa
cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
logTime("total training time", System.nanoTime() - start);
+ /*
if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
cvb0.inferDocuments(0.0, 100, true);
} else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
cvb0.inferDocuments(0.0, 100, false);
}
+ */
start = System.nanoTime();
cvb0.writeModel(new Path(topicOutFile));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Wed Jun 20 12:07:50 2012
@@ -480,7 +480,8 @@ public class TopicModel implements Confi
}
}
- @Override public void run() {
+ @Override
+ public void run() {
while (!shutdown) {
try {
Pair<Integer, Vector> pair = queue.poll(1, TimeUnit.SECONDS);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Wed Jun 20 12:07:50 2012
@@ -130,7 +130,7 @@ public class EigencutsDriver extends Abs
// eigendecomposition (step 3)
int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
LanczosState state = new LanczosState(L, eigenrank,
- new DistributedLanczosSolver().getInitialVector(L));
+ DistributedLanczosSolver.getInitialVector(L));
DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
U.setConf(new Configuration(conf));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java Wed Jun 20 12:07:50 2012
@@ -54,8 +54,7 @@ public class SpectralKMeansDriver extend
}
@Override
- public int run(String[] arg0)
- throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
+ public int run(String[] arg0) throws IOException, ClassNotFoundException, InterruptedException {
// set up command line options
Configuration conf = getConf();
addInputOption();
@@ -143,7 +142,7 @@ public class SpectralKMeansDriver extend
// unnecessary vectors later
int overshoot = (int) ((double) clusters * OVERSHOOT_MULTIPLIER);
DistributedLanczosSolver solver = new DistributedLanczosSolver();
- LanczosState state = new LanczosState(L, clusters, solver.getInitialVector(L));
+ LanczosState state = new LanczosState(L, clusters, DistributedLanczosSolver.getInitialVector(L));
Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFF));
solver.runJob(conf,
state,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java Wed Jun 20 12:07:50 2012
@@ -91,10 +91,9 @@ public final class ClusterOutputPostProc
* Creates the directory to put post processed clusters.
*/
private void createPostProcessDirectory() throws IOException {
- if (!fileSystem.exists(clusterPostProcessorOutput)) {
- if (!fileSystem.mkdirs(clusterPostProcessorOutput)) {
- throw new IOException("Error creating cluster post processor directory");
- }
+ if (!fileSystem.exists(clusterPostProcessorOutput) &&
+ !fileSystem.mkdirs(clusterPostProcessorOutput)) {
+ throw new IOException("Error creating cluster post processor directory");
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Wed Jun 20 12:07:50 2012
@@ -232,8 +232,6 @@ public abstract class AbstractJob extend
this.outputOption = addOption(DefaultOptionCreator.outputOption().create());
}
-
-
/** Build an option with the given parameters. Name and description are
* required.
*
@@ -246,21 +244,21 @@ public abstract class AbstractJob extend
* @return the option.
*/
protected static Option buildOption(String name,
- String shortName,
- String description,
- boolean hasArg,
- boolean required,
- String defaultValue) {
+ String shortName,
+ String description,
+ boolean hasArg,
+ boolean required,
+ String defaultValue) {
return buildOption(name, shortName, description, hasArg, 1, 1, required, defaultValue);
}
protected static Option buildOption(String name,
- String shortName,
- String description,
- boolean hasArg, int min, int max,
- boolean required,
- String defaultValue) {
+ String shortName,
+ String description,
+ boolean hasArg, int min, int max,
+ boolean required,
+ String defaultValue) {
DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
.withRequired(required);
@@ -281,10 +279,8 @@ public abstract class AbstractJob extend
return optBuilder.create();
}
- //convenience method
/**
- *
* @param name The name of the option
* @return the {@link org.apache.commons.cli2.Option} with the name, else null
*/
@@ -311,7 +307,6 @@ public abstract class AbstractJob extend
* names used for keys are the option name parameter prefixed by '--'.
*
* @see #parseArguments(String[], boolean, boolean) -- passes in false, false for the optional args.
- *
*/
public Map<String, List<String>> parseArguments(String[] args) throws IOException {
return parseArguments(args, false, false);
@@ -323,9 +318,9 @@ public abstract class AbstractJob extend
* @param inputOptional if false, then the input option, if set, need not be present. If true and input is an option and there is no input, then throw an error
* @param outputOptional if false, then the output option, if set, need not be present. If true and output is an option and there is no output, then throw an error
* @return the args parsed into a map.
- * @throws IOException
*/
- public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional) throws IOException{
+ public Map<String, List<String>> parseArguments(String[] args, boolean inputOptional, boolean outputOptional)
+ throws IOException {
Option helpOpt = addOption(DefaultOptionCreator.helpOption());
addOption("tempDir", null, "Intermediate output directory", "temp");
addOption("startPhase", null, "First phase to run", "0");
@@ -388,7 +383,7 @@ public abstract class AbstractJob extend
*/
public String getOption(String optionName) {
List<String> list = argMap.get(keyFor(optionName));
- if (list != null && list.isEmpty() == false) {
+ if (list != null && !list.isEmpty()) {
return list.get(0);
}
return null;
@@ -411,7 +406,8 @@ public abstract class AbstractJob extend
/**
* Options can occur multiple times, so return the list
* @param optionName The unadorned (no "--" prefixing it) option name
- * @return The values, else null. If the option is present, but has no values, then the result will be an empty list (Collections.emptyList())
+ * @return The values, else null. If the option is present, but has no values, then the result will be an
+ * empty list (Collections.emptyList())
*/
public List<String> getOptions(String optionName) {
return argMap.get(keyFor(optionName));
@@ -431,13 +427,14 @@ public abstract class AbstractJob extend
* @param matrix
* @return the cardinality of the vector
*/
- public int getDimensions(Path matrix) throws IOException, InstantiationException, IllegalAccessException {
+ public int getDimensions(Path matrix) throws IOException {
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(FileSystem.get(getConf()), matrix, getConf());
- Writable row = (Writable) reader.getKeyClass().newInstance();
+ Writable row = ClassUtils.instantiateAs(reader.getKeyClass().asSubclass(Writable.class), Writable.class);
+
VectorWritable vectorWritable = new VectorWritable();
Preconditions.checkArgument(reader.getValueClass().equals(VectorWritable.class),
@@ -453,7 +450,8 @@ public abstract class AbstractJob extend
}
}
- /** Obtain input and output directories from command-line options or hadoop
+ /**
+ * Obtain input and output directories from command-line options or hadoop
* properties. If {@code addInputOption} or {@code addOutputOption}
* has been called, this method will throw an {@code OptionException} if
* no source (command-line or property) for that value is present.
@@ -461,7 +459,6 @@ public abstract class AbstractJob extend
* non-null only if specified as a hadoop property. Command-line options
* take precedence over hadoop properties.
*
- * @param cmdLine
* @throws IllegalArgumentException if either inputOption is present,
* and neither {@code --input} nor {@code -Dmapred.input dir} are
* specified or outputOption is present and neither {@code --output}
@@ -487,9 +484,9 @@ public abstract class AbstractJob extend
this.outputPath = new Path(conf.get("mapred.output.dir"));
}
- Preconditions.checkArgument(inputOptional == true || inputOption == null || inputPath != null,
+ Preconditions.checkArgument(inputOptional || inputOption == null || inputPath != null,
"No input specified or -Dmapred.input.dir must be provided to specify input directory");
- Preconditions.checkArgument(outputOptional == true || outputOption == null || outputPath != null,
+ Preconditions.checkArgument(outputOptional || outputOption == null || outputPath != null,
"No output specified: or -Dmapred.output.dir must be provided to specify output directory");
}
@@ -498,11 +495,12 @@ public abstract class AbstractJob extend
// the option appeared on the command-line, or it has a value
// (which is likely a default value).
- if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null || (cmdLine.getValues(o) != null && cmdLine.getValues(o).isEmpty() == false)) {
+ if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null ||
+ (cmdLine.getValues(o) != null && !cmdLine.getValues(o).isEmpty())) {
// nulls are ok, for cases where options are simple flags.
- List vo = cmdLine.getValues(o);
- if (vo != null && vo.isEmpty() == false) {
+ List<?> vo = cmdLine.getValues(o);
+ if (vo != null && !vo.isEmpty()) {
List<String> vals = new ArrayList<String>();
for (Object o1 : vo) {
vals.add(o1.toString());
@@ -523,7 +521,7 @@ public abstract class AbstractJob extend
*/
public static String getOption(Map<String, List<String>> args, String optName) {
List<String> res = args.get(optName);
- if (res != null && res.isEmpty() == false) {
+ if (res != null && !res.isEmpty()) {
return res.get(0);
}
return null;
@@ -581,8 +579,8 @@ public abstract class AbstractJob extend
}
/**
- * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is obsolete when MultipleInputs is available
- * again
+ * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is
+ * obsolete when MultipleInputs is available again
*/
public static void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
throws IOException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java Wed Jun 20 12:07:50 2012
@@ -18,8 +18,10 @@
package org.apache.mahout.common;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import com.google.common.base.Charsets;
import org.apache.commons.cli.Options;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
@@ -51,7 +53,7 @@ public final class CommandLineUtil {
fmt.printHelp("<command> [Generic Options] [Job-Specific Options]",
"Generic Options:", ops, "");
- PrintWriter pw = new PrintWriter(System.out, true);
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true);
HelpFormatter formatter = new HelpFormatter();
formatter.setGroup(group);
formatter.setPrintWriter(pw);
@@ -69,7 +71,7 @@ public final class CommandLineUtil {
fmt.printHelp("<command> [Generic Options] [Job-Specific Options]",
"Generic Options:", ops, "");
- PrintWriter pw = new PrintWriter(System.out, true);
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true);
HelpFormatter formatter = new HelpFormatter();
formatter.setGroup(group);
formatter.setPrintWriter(pw);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java Wed Jun 20 12:07:50 2012
@@ -29,7 +29,8 @@ public final class StringUtils {
private static final XStream XSTREAM = new XStream();
private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
-
+ private static final Pattern XMLRESERVED = Pattern.compile("\"|\\&|\\<|\\>|\'");
+
private StringUtils() {
// do nothing
}
@@ -56,7 +57,7 @@ public final class StringUtils {
return (T) XSTREAM.fromXML(str);
}
- public static String escapeXML(String input) {
- return input.replaceAll("\"|\\&|\\<|\\>|\'", "_");
+ public static String escapeXML(CharSequence input) {
+ return XMLRESERVED.matcher(input).replaceAll("_");
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java Wed Jun 20 12:07:50 2012
@@ -147,8 +147,6 @@ public class MahalanobisDistanceMeasure
if (v1.size() != v2.size()) {
throw new CardinalityException(v1.size(), v2.size());
}
- if (inverseCovarianceMatrix== null)
- System.out.println();
return Math.sqrt(v1.minus(v2).dot(Algebra.mult(inverseCovarianceMatrix, v1.minus(v2))));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Wed Jun 20 12:07:50 2012
@@ -200,7 +200,7 @@ public final class MahoutDriver {
}
private static boolean isDeprecated(Properties mainClasses, String keyString) {
- return shortName(mainClasses.getProperty(keyString)).equalsIgnoreCase("deprecated");
+ return "deprecated".equalsIgnoreCase(shortName(mainClasses.getProperty(keyString)));
}
private static Properties loadProperties(String resource) throws IOException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Wed Jun 20 12:07:50 2012
@@ -50,8 +50,6 @@ import org.apache.mahout.common.iterator
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.apache.mahout.math.list.IntArrayList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
*
@@ -95,7 +93,7 @@ public final class PFPGrowth {
throw new IOException("Cannot read Frequency list from Distributed Cache");
}
if (files.length != 1) {
- throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ")");
+ throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
}
FileSystem fs = FileSystem.getLocal(conf);
Path fListLocalPath = fs.makeQualified(files[0]);
@@ -106,7 +104,7 @@ public final class PFPGrowth {
throw new IOException("Cannot read Frequency list from Distributed Cache");
}
if (filesURIs.length != 1) {
- throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ")");
+ throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
}
fListLocalPath = new Path(filesURIs[0].getPath());
}
@@ -187,8 +185,9 @@ public final class PFPGrowth {
IntArrayList ret = new IntArrayList();
int start = groupId * maxPerGroup;
int end = start + maxPerGroup;
- if (end > numFeatures)
+ if (end > numFeatures) {
end = numFeatures;
+ }
for (int i = start; i < end; i++) {
ret.add(i);
}
@@ -234,13 +233,12 @@ public final class PFPGrowth {
saveFList(fList, params, conf);
// set param to control group size in MR jobs
- int numGroups = params.getInt(PFPGrowth.NUM_GROUPS,
- PFPGrowth.NUM_GROUPS_DEFAULT);
+ int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
int maxPerGroup = fList.size() / numGroups;
- if (fList.size() % numGroups != 0)
+ if (fList.size() % numGroups != 0) {
maxPerGroup++;
+ }
params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
- fList = null;
startParallelFPGrowth(params, conf);
startAggregating(params, conf);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Wed Jun 20 12:07:50 2012
@@ -41,8 +41,7 @@ public class ParallelFPGrowthMapper exte
private final OpenObjectIntHashMap<String> fMap = new OpenObjectIntHashMap<String>();
private Pattern splitter;
private int maxPerGroup;
-
- private IntWritable wGroupID = new IntWritable();
+ private final IntWritable wGroupID = new IntWritable();
@Override
protected void map(LongWritable offset, Text input, Context context)
@@ -95,6 +94,6 @@ public class ParallelFPGrowthMapper exte
splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN,
PFPGrowth.SPLITTER.toString()));
- maxPerGroup = Integer.valueOf(params.getInt(PFPGrowth.MAX_PER_GROUP, 0));
+ maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Wed Jun 20 12:07:50 2012
@@ -36,6 +36,7 @@ import org.apache.mahout.fpm.pfpgrowth.c
import org.apache.mahout.fpm.pfpgrowth.convertors.integer.IntegerStringOutputConverter;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
+import org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds;
import org.apache.mahout.math.list.IntArrayList;
import org.apache.mahout.math.list.LongArrayList;
@@ -44,22 +45,18 @@ import org.apache.mahout.math.list.LongA
* outputs the the Top K frequent Patterns for each group.
*
*/
-public class ParallelFPGrowthReducer extends Reducer<IntWritable,TransactionTree,Text,TopKStringPatterns> {
+public final class ParallelFPGrowthReducer extends Reducer<IntWritable,TransactionTree,Text,TopKStringPatterns> {
private final List<String> featureReverseMap = Lists.newArrayList();
private final LongArrayList freqList = new LongArrayList();
-
private int maxHeapSize = 50;
-
private int minSupport = 3;
-
private int numFeatures;
private int maxPerGroup;
-
private boolean useFP2;
private static class IteratorAdapter implements Iterator<Pair<List<Integer>,Long>> {
- private Iterator<Pair<IntArrayList,Long>> innerIter;
+ private final Iterator<Pair<IntArrayList,Long>> innerIter;
private IteratorAdapter(Iterator<Pair<IntArrayList,Long>> transactionIter) {
innerIter = transactionIter;
@@ -73,7 +70,7 @@ public class ParallelFPGrowthReducer ext
@Override
public Pair<List<Integer>,Long> next() {
Pair<IntArrayList,Long> innerNext = innerIter.next();
- return new Pair(innerNext.getFirst().toList(), innerNext.getSecond());
+ return new Pair<List<Integer>,Long>(innerNext.getFirst().toList(), innerNext.getSecond());
}
@Override
@@ -99,18 +96,16 @@ public class ParallelFPGrowthReducer ext
Collections.sort(localFList, new CountDescendingPairComparator<Integer,Long>());
if (useFP2) {
- org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds fpGrowth =
- new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthIds();
- fpGrowth.generateTopKFrequentPatterns(
+ FPGrowthIds.generateTopKFrequentPatterns(
cTree.iterator(),
freqList,
minSupport,
maxHeapSize,
PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures),
new IntegerStringOutputConverter(
- new ContextWriteOutputCollector<IntWritable,TransactionTree,Text,TopKStringPatterns>(context),
+ new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(context),
featureReverseMap),
- new ContextStatusUpdater<IntWritable,TransactionTree,Text,TopKStringPatterns>(context));
+ new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context));
} else {
FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
fpGrowth.generateTopKFrequentPatterns(
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Wed Jun 20 12:07:50 2012
@@ -42,10 +42,10 @@ public class TransactionIterator<T> exte
new Function<Pair<List<T>,Long>, Pair<int[],Long>>() {
@Override
public Pair<int[],Long> apply(Pair<List<T>,Long> from) {
- int index = 0;
- if (from == null) {
+ if (from == null) {
return null;
}
+ int index = 0;
for (T attribute : from.getFirst()) {
if (attributeIdMapping.containsKey(attribute)) {
transactionBuffer[index++] = attributeIdMapping.get(attribute);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Wed Jun 20 12:07:50 2012
@@ -163,7 +163,8 @@ public final class FrequentPatternMaxHea
return true;
}
- public String toString() {
+ @Override
+ public String toString() {
StringBuilder sb = new StringBuilder("FreqPatHeap{");
String sep = "";
for (Pattern p : getHeap()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java Wed Jun 20 12:07:50 2012
@@ -48,10 +48,13 @@ import org.apache.mahout.fpm.pfpgrowth.
/**
* Implementation of PFGrowth Algorithm
*/
-public class FPGrowthIds {
+public final class FPGrowthIds {
private static final Logger log = LoggerFactory.getLogger(FPGrowthIds.class);
+ private FPGrowthIds() {
+ }
+
public static List<Pair<String,TopKStringPatterns>> readFrequentPattern(Configuration conf, Path path) {
List<Pair<String,TopKStringPatterns>> ret = Lists.newArrayList();
// key is feature value is count
@@ -68,7 +71,7 @@ public class FPGrowthIds {
*
* @param transactionStream
* Iterator of transaction
- * @param frequencyList
+ * @param attributeFrequency
* list of frequent features and their support value
* @param minSupport
* minimum support of the transactions
@@ -83,13 +86,13 @@ public class FPGrowthIds {
* written
* @throws IOException
*/
- public final void generateTopKFrequentPatterns(Iterator<Pair<IntArrayList,Long>> transactionStream,
- LongArrayList attributeFrequency,
- long minSupport,
- int k,
- IntArrayList returnableFeatures,
- OutputCollector<Integer,List<Pair<List<Integer>,Long>>> output,
- StatusUpdater updater) throws IOException {
+ public static void generateTopKFrequentPatterns(Iterator<Pair<IntArrayList, Long>> transactionStream,
+ LongArrayList attributeFrequency,
+ long minSupport,
+ int k,
+ IntArrayList returnableFeatures,
+ OutputCollector<Integer, List<Pair<List<Integer>, Long>>> output,
+ StatusUpdater updater) throws IOException {
for (int i = 0; i < attributeFrequency.size(); i++) {
if (attributeFrequency.get(i) < minSupport) {
@@ -116,10 +119,12 @@ public class FPGrowthIds {
private static class IdentityMapping extends AbstractMap<Integer, Integer> {
+ @Override
public Set<Map.Entry<Integer,Integer>> entrySet() {
throw new IllegalStateException();
}
+ @Override
public Integer get(Object key) {
return (Integer) key;
}
@@ -142,12 +147,12 @@ public class FPGrowthIds {
* integer to A
* @return Top K Frequent Patterns for each feature and their support
*/
- private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
- long minSupportValue,
- int k,
- IntArrayList requiredFeatures,
- TopKPatternsOutputConverter<Integer> outputCollector,
- StatusUpdater updater) throws IOException {
+ private static Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
+ long minSupportValue,
+ int k,
+ IntArrayList requiredFeatures,
+ TopKPatternsOutputConverter<Integer> outputCollector,
+ StatusUpdater updater) throws IOException {
Map<Integer,FrequentPatternMaxHeap> patterns = Maps.newHashMap();
requiredFeatures.sort();
@@ -182,8 +187,6 @@ public class FPGrowthIds {
* minimum support of the pattern to be mined
* @param k
* Max value of the Size of the Max-Heap in which Patterns are held
- * @param featureSetSize
- * number of features
* @param returnFeatures
* the id's of the features for which Top K patterns have to be mined
* @param topKPatternsOutputCollector
@@ -191,19 +194,18 @@ public class FPGrowthIds {
* format to the corresponding A Format
* @return Top K frequent patterns for each attribute
*/
- private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
- Iterator<Pair<IntArrayList,Long>> transactions,
- LongArrayList attributeFrequency,
- long minSupport,
- int k,
- IntArrayList returnFeatures,
- TopKPatternsOutputConverter<Integer> topKPatternsOutputCollector,
- StatusUpdater updater) throws IOException {
+ private static Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
+ Iterator<Pair<IntArrayList, Long>> transactions,
+ LongArrayList attributeFrequency,
+ long minSupport,
+ int k,
+ IntArrayList returnFeatures,
+ TopKPatternsOutputConverter<Integer> topKPatternsOutputCollector,
+ StatusUpdater updater) throws IOException {
FPTree tree = new FPTree(attributeFrequency, minSupport);
// Constructing initial FPTree from the list of transactions
- int nodecount = 0;
int i = 0;
while (transactions.hasNext()) {
Pair<IntArrayList,Long> transaction = transactions.next();
@@ -215,8 +217,6 @@ public class FPGrowthIds {
}
}
- log.info("Number of Nodes in the FP Tree: {}", nodecount);
-
return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
}
@@ -282,10 +282,12 @@ public class FPGrowthIds {
int[] qints = q.getPattern();
Pattern pq = new Pattern();
- for (int pi = 0; pi < p.length(); pi++)
+ for (int pi = 0; pi < p.length(); pi++) {
pq.add(pints[pi], p.support());
- for (int qi = 0; qi < q.length(); qi++)
+ }
+ for (int qi = 0; qi < q.length(); qi++) {
pq.add(qints[qi], q.support());
+ }
pats.insert(pq);
}
}
@@ -293,8 +295,9 @@ public class FPGrowthIds {
for (Pattern q : qPats.getHeap()) {
Pattern qq = new Pattern();
int[] qints = q.getPattern();
- for (int qi = 0; qi < q.length(); qi++)
+ for (int qi = 0; qi < q.length(); qi++) {
qq.add(qints[qi], q.support());
+ }
pats.insert(qq);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java Wed Jun 20 12:07:50 2012
@@ -220,8 +220,6 @@ public class FPGrowthObj<A extends Compa
return patterns;
}
-
-
/**
* Internal TopKFrequentPattern Generation algorithm, which represents the A's
* as integers and transforms features to use only integers
@@ -234,8 +232,6 @@ public class FPGrowthObj<A extends Compa
* minimum support of the pattern to be mined
* @param k
* Max value of the Size of the Max-Heap in which Patterns are held
- * @param featureSetSize
- * number of features
* @param returnFeatures
* the id's of the features for which Top K patterns have to be mined
* @param topKPatternsOutputCollector
@@ -248,21 +244,20 @@ public class FPGrowthObj<A extends Compa
long[] attributeFrequency,
long minSupport,
int k,
- //int featureSetSize,
Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
StatusUpdater updater) throws IOException {
FPTree tree = new FPTree(attributeFrequency, minSupport);
// Constructing initial FPTree from the list of transactions
- int nodecount = 0;
int i = 0;
while (transactions.hasNext()) {
Pair<int[],Long> transaction = transactions.next();
List<Integer> iLst = Lists.newArrayList();
int[] iArr = transaction.getFirst();
- for (int j = 0; j < iArr.length; j++)
- iLst.add(iArr[j]);
+ for (int anIArr : iArr) {
+ iLst.add(anIArr);
+ }
tree.accumulate(iLst, transaction.getSecond());
i++;
if (i % 10000 == 0) {
@@ -270,8 +265,6 @@ public class FPGrowthObj<A extends Compa
}
}
- log.info("Number of Nodes in the FP Tree: {}", nodecount);
-
return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
}
@@ -337,10 +330,12 @@ public class FPGrowthObj<A extends Compa
int[] qints = q.getPattern();
Pattern pq = new Pattern();
- for (int pi = 0; pi < p.length(); pi++)
+ for (int pi = 0; pi < p.length(); pi++) {
pq.add(pints[pi], p.support());
- for (int qi = 0; qi < q.length(); qi++)
+ }
+ for (int qi = 0; qi < q.length(); qi++) {
pq.add(qints[qi], q.support());
+ }
pats.insert(pq);
}
}
@@ -348,8 +343,9 @@ public class FPGrowthObj<A extends Compa
for (Pattern q : qPats.getHeap()) {
Pattern qq = new Pattern();
int[] qints = q.getPattern();
- for (int qi = 0; qi < q.length(); qi++)
+ for (int qi = 0; qi < q.length(); qi++) {
qq.add(qints[qi], q.support());
+ }
pats.insert(qq);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java Wed Jun 20 12:07:50 2012
@@ -33,22 +33,22 @@ import org.apache.mahout.math.map.OpenIn
public class FPTree {
private final AttrComparator attrComparator = new AttrComparator();
- private FPNode root;
- private long minSupport;
- private LongArrayList attrCountList;
- private OpenIntObjectHashMap attrNodeLists;
+ private final FPNode root;
+ private final long minSupport;
+ private final LongArrayList attrCountList;
+ private final OpenIntObjectHashMap<List<FPNode>> attrNodeLists;
public static final class FPNode {
- private FPNode parent;
- private OpenIntObjectHashMap childMap;
- private int attribute;
+ private final FPNode parent;
+ private final OpenIntObjectHashMap<FPNode> childMap;
+ private final int attribute;
private long count;
private FPNode(FPNode parent, int attribute, long count) {
this.parent = parent;
this.attribute = attribute;
this.count = count;
- this.childMap = new OpenIntObjectHashMap();
+ this.childMap = new OpenIntObjectHashMap<FPNode>();
}
private void addChild(FPNode child) {
@@ -68,7 +68,7 @@ public class FPTree {
}
public FPNode child(int attribute) {
- return (FPNode) childMap.get(attribute);
+ return childMap.get(attribute);
}
public int attribute() {
@@ -76,7 +76,7 @@ public class FPTree {
}
public void accumulate(long incr) {
- count = count + incr;
+ count += incr;
}
public long count() {
@@ -94,7 +94,7 @@ public class FPTree {
public FPTree(LongArrayList attrCountList, long minSupport) {
this.root = new FPNode(null, -1, 0);
this.attrCountList = attrCountList;
- this.attrNodeLists = new OpenIntObjectHashMap();
+ this.attrNodeLists = new OpenIntObjectHashMap<List<FPNode>>();
this.minSupport = minSupport;
}
@@ -107,14 +107,15 @@ public class FPTree {
public FPTree(long[] attrCounts, long minSupport) {
this.root = new FPNode(null, -1, 0);
this.attrCountList = new LongArrayList();
- for (int i = 0; i < attrCounts.length; i++)
+ for (int i = 0; i < attrCounts.length; i++) {
if (attrCounts[i] > 0) {
if (attrCountList.size() < (i + 1)) {
attrCountList.setSize(i + 1);
}
attrCountList.set(i, attrCounts[i]);
}
- this.attrNodeLists = new OpenIntObjectHashMap();
+ }
+ this.attrNodeLists = new OpenIntObjectHashMap<List<FPNode>>();
this.minSupport = minSupport;
}
@@ -145,13 +146,14 @@ public class FPTree {
Collections.sort(items, attrComparator);
FPNode currNode = root;
- for (int i = 0; i < items.size(); i++) {
- int item = items.get(i);
+ for (Integer item : items) {
long attrCount = 0;
- if (item < attrCountList.size())
+ if (item < attrCountList.size()) {
attrCount = attrCountList.get(item);
- if (attrCount < minSupport)
+ }
+ if (attrCount < minSupport) {
continue;
+ }
FPNode next = currNode.child(item);
if (next == null) {
@@ -179,17 +181,17 @@ public class FPTree {
Collections.sort(items, attrComparator);
FPNode currNode = root;
- for (int i = 0; i < items.size(); i++) {
- int item = items.get(i);
+ for (Integer item : items) {
long attrCount = attrCountList.get(item);
- if (attrCount < minSupport)
+ if (attrCount < minSupport) {
continue;
+ }
FPNode next = currNode.child(item);
if (next == null) {
next = new FPNode(currNode, item, count);
currNode.addChild(next);
- List<FPNode> nodeList = (List<FPNode>) attrNodeLists.get(item);
+ List<FPNode> nodeList = attrNodeLists.get(item);
if (nodeList == null) {
nodeList = Lists.newArrayList();
attrNodeLists.put(item, nodeList);
@@ -210,8 +212,9 @@ public class FPTree {
public Iterable<Integer> attrIterable() {
List<Integer> attrs = Lists.newArrayList();
for (int i = 0; i < attrCountList.size(); i++) {
- if (attrCountList.get(i) > 0)
+ if (attrCountList.get(i) > 0) {
attrs.add(i);
+ }
}
Collections.sort(attrs, attrComparator);
return attrs;
@@ -224,8 +227,9 @@ public class FPTree {
public Iterable<Integer> attrIterableRev() {
List<Integer> attrs = Lists.newArrayList();
for (int i = 0; i < attrCountList.size(); i++) {
- if (attrCountList.get(i) > 0)
+ if (attrCountList.get(i) > 0) {
attrs.add(i);
+ }
}
Collections.sort(attrs, Collections.reverseOrder(attrComparator));
return attrs;
@@ -237,7 +241,7 @@ public class FPTree {
*/
public FPTree createMoreFreqConditionalTree(int targetAttr) {
LongArrayList counts = new LongArrayList();
- List<FPNode> nodeList = (List<FPNode>) attrNodeLists.get(targetAttr);
+ List<FPNode> nodeList = attrNodeLists.get(targetAttr);
for (FPNode currNode : nodeList) {
long pathCount = currNode.count();
@@ -251,21 +255,23 @@ public class FPTree {
currNode = currNode.parent();
}
}
- if (counts.get(targetAttr) != attrCountList.get(targetAttr))
+ if (counts.get(targetAttr) != attrCountList.get(targetAttr)) {
throw new IllegalStateException("mismatched counts for targetAttr="
- + targetAttr + ", (" + counts.get(targetAttr)
- + " != " + attrCountList.get(targetAttr) + "); "
- + "thisTree=" + this + "\n");
+ + targetAttr + ", (" + counts.get(targetAttr)
+ + " != " + attrCountList.get(targetAttr) + "); "
+ + "thisTree=" + this + '\n');
+ }
counts.set(targetAttr, 0L);
FPTree toRet = new FPTree(counts, minSupport);
IntArrayList attrLst = new IntArrayList();
- for (FPNode currNode : (List<FPNode>) attrNodeLists.get(targetAttr)) {
+ for (FPNode currNode : attrNodeLists.get(targetAttr)) {
long count = currNode.count();
attrLst.clear();
while (currNode != root) {
- if (currNode.count() < count)
+ if (currNode.count() < count) {
throw new IllegalStateException();
+ }
attrLst.add(currNode.attribute());
currNode = currNode.parent();
}
@@ -277,16 +283,20 @@ public class FPTree {
// biggest count or smallest attr number goes first
private class AttrComparator implements Comparator<Integer> {
+ @Override
public int compare(Integer a, Integer b) {
long aCnt = 0;
- if (a < attrCountList.size())
+ if (a < attrCountList.size()) {
aCnt = attrCountList.get(a);
+ }
long bCnt = 0;
- if (b < attrCountList.size())
+ if (b < attrCountList.size()) {
bCnt = attrCountList.get(b);
- if (aCnt == bCnt)
+ }
+ if (aCnt == bCnt) {
return a - b;
+ }
return (bCnt - aCnt) < 0 ? -1 : 1;
}
}
@@ -305,8 +315,9 @@ public class FPTree {
FPNode currNode = root;
while (currNode.numChildren() == 1) {
currNode = currNode.children().iterator().next();
- if (pAttrCountList.size() <= currNode.attribute())
+ if (pAttrCountList.size() <= currNode.attribute()) {
pAttrCountList.setSize(currNode.attribute() + 1);
+ }
pAttrCountList.set(currNode.attribute(), currNode.count());
qAttrCountList.set(currNode.attribute(), 0);
}
@@ -320,17 +331,18 @@ public class FPTree {
private long recursivelyAddPrefixPats(FPTree pTree, FPTree qTree, FPNode node,
IntArrayList items) {
- long added = 0;
long count = node.count();
int attribute = node.attribute();
if (items == null) {
// at root
- if (!(node == root))
+ if (!(node == root)) {
throw new IllegalStateException();
+ }
items = new IntArrayList();
} else {
items.add(attribute);
}
+ long added = 0;
for (FPNode child : node.children()) {
added += recursivelyAddPrefixPats(pTree, qTree, child, items);
}
@@ -350,7 +362,7 @@ public class FPTree {
return added;
}
- private void toStringHelper(StringBuilder sb, FPNode currNode, String prefix) {
+ private static void toStringHelper(StringBuilder sb, FPNode currNode, String prefix) {
if (currNode.numChildren() == 0) {
sb.append(prefix).append("-{attr:").append(currNode.attribute())
.append(", cnt:").append(currNode.count()).append("}\n");
@@ -363,15 +375,17 @@ public class FPTree {
fakePre.append(' ');
}
int i = 0;
- for (FPNode child : currNode.children())
+ for (FPNode child : currNode.children()) {
toStringHelper(sb, child, (i++ == 0 ? newPre : fakePre).toString() + '-' + i + "->");
+ }
}
}
+ @Override
public String toString() {
StringBuilder sb = new StringBuilder("[FPTree\n");
toStringHelper(sb, root, " ");
- sb.append("]");
+ sb.append(']');
return sb.toString();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Wed Jun 20 12:07:50 2012
@@ -20,7 +20,6 @@ package org.apache.mahout.math.hadoop;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configurable;
@@ -192,9 +191,7 @@ public class DistributedRowMatrix implem
return out;
}
- public Vector columnMeans() throws IOException, InterruptedException, ClassNotFoundException,
- IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException,
- InvocationTargetException, NoSuchMethodException {
+ public Vector columnMeans() throws IOException {
return columnMeans("SequentialAccessSparseVector");
}
@@ -206,18 +203,13 @@ public class DistributedRowMatrix implem
* RandomAccessSparseVector, DenseVector
* @return Vector containing the column-wise mean of this
*/
- public Vector columnMeans(String vectorClass) throws IOException,
- InterruptedException, IllegalArgumentException, SecurityException,
- ClassNotFoundException, InstantiationException, IllegalAccessException,
- InvocationTargetException, NoSuchMethodException {
+ public Vector columnMeans(String vectorClass) throws IOException {
Path outputVectorTmpPath =
new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
Configuration initialConf =
getConf() == null ? new Configuration() : getConf();
String vectorClassFull = "org.apache.mahout.math." + vectorClass;
- Vector mean =
- MatrixColumnMeansJob.run(initialConf, rowPath, outputVectorTmpPath,
- vectorClassFull);
+ Vector mean = MatrixColumnMeansJob.run(initialConf, rowPath, outputVectorTmpPath, vectorClassFull);
if (!keepTempFiles) {
FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
fs.delete(outputVectorTmpPath, true);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java Wed Jun 20 12:07:50 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.lib.i
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
@@ -44,15 +45,18 @@ import com.google.common.io.Closeables;
* DistributedRowMatrix. This job can be accessed using
* DistributedRowMatrix.columnMeans()
*/
-public class MatrixColumnMeansJob {
+public final class MatrixColumnMeansJob {
public static final String VECTOR_CLASS =
"DistributedRowMatrix.columnMeans.vector.class";
+ private MatrixColumnMeansJob() {
+ }
+
public static Vector run(Configuration conf,
Path inputPath,
Path outputVectorTmpPath) throws IOException {
- return run(conf, inputPath, outputVectorTmpPath);
+ return run(conf, inputPath, outputVectorTmpPath, VECTOR_CLASS);
}
/**
@@ -61,7 +65,7 @@ public class MatrixColumnMeansJob {
* @param initialConf
* @param inputPath
* path to DistributedRowMatrix input
- * @param tmpPath
+ * @param outputVectorTmpPath
* path for temporary files created during job
* @param vectorClass
* String of desired class for returned vector e.g. DenseVector,
@@ -116,10 +120,11 @@ public class MatrixColumnMeansJob {
Closeables.closeQuietly(iterator);
}
} catch (Throwable thr) {
- if (thr instanceof IOException)
+ if (thr instanceof IOException) {
throw (IOException) thr;
- else
+ } else {
throw new IOException(thr);
+ }
}
}
@@ -129,8 +134,8 @@ public class MatrixColumnMeansJob {
public static class MatrixColumnMeansMapper extends
Mapper<IntWritable, VectorWritable, NullWritable, VectorWritable> {
- private Vector runningSum = null;
- private String vectorClass = null;
+ private Vector runningSum;
+ private String vectorClass;
@Override
public void setup(Context context) {
@@ -147,17 +152,14 @@ public class MatrixColumnMeansJob {
public void map(IntWritable r, VectorWritable v, Context context)
throws IOException {
if (runningSum == null) {
- try {
/*
* If this is the first vector the mapper has seen, instantiate a new
* vector using the parameter VECTOR_CLASS
*/
- runningSum =
- (Vector) Class.forName(vectorClass).getConstructor(int.class)
- .newInstance(v.get().size() + 1);
- } catch (Exception e) {
- e.printStackTrace();
- }
+ runningSum = ClassUtils.instantiateAs(vectorClass,
+ Vector.class,
+ new Class<?>[] { int.class },
+ new Object[] { v.get().size() + 1 });
runningSum.set(0, 1);
runningSum.viewPart(1, v.get().size()).assign(v.get());
} else {
@@ -188,9 +190,10 @@ public class MatrixColumnMeansJob {
public static class MatrixColumnMeansReducer extends
Reducer<NullWritable, VectorWritable, IntWritable, VectorWritable> {
- private static final IntWritable one = new IntWritable(1);
- private String vectorClass = null;
- Vector outputVector = null;
+ private static final IntWritable ONE = new IntWritable(1);
+
+ private String vectorClass;
+ Vector outputVector;
VectorWritable outputVectorWritable = new VectorWritable();
@Override
@@ -201,8 +204,7 @@ public class MatrixColumnMeansJob {
@Override
public void reduce(NullWritable n,
Iterable<VectorWritable> vectors,
- Context context) throws IOException,
- InterruptedException {
+ Context context) throws IOException, InterruptedException {
/**
* Add together partial column-wise sums from mappers
@@ -223,16 +225,13 @@ public class MatrixColumnMeansJob {
outputVectorWritable.set(outputVector.viewPart(1,
outputVector.size() - 1)
.divide(outputVector.get(0)));
- context.write(one, outputVectorWritable);
+ context.write(ONE, outputVectorWritable);
} else {
- try {
- Vector emptyVector =
- (Vector) Class.forName(vectorClass).getConstructor(int.class)
- .newInstance(0);
- context.write(one, new VectorWritable(emptyVector));
- } catch (Exception e) {
- e.printStackTrace();
- }
+ Vector emptyVector = ClassUtils.instantiateAs(vectorClass,
+ Vector.class,
+ new Class<?>[] { int.class },
+ new Object[] { 0 });
+ context.write(ONE, new VectorWritable(emptyVector));
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java Wed Jun 20 12:07:50 2012
@@ -95,7 +95,6 @@ public class TransposeJob extends Abstra
FileInputFormat.addInputPath(conf, matrixInputPath);
conf.setInputFormat(SequenceFileInputFormat.class);
FileOutputFormat.setOutputPath(conf, matrixOutputPath);
- System.out.println("OUTPUT --> " + matrixOutputPath.toString());
conf.setMapperClass(TransposeMapper.class);
conf.setMapOutputKeyClass(IntWritable.class);
conf.setMapOutputValueClass(VectorWritable.class);
@@ -136,8 +135,10 @@ public class TransposeJob extends Abstra
implements Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
@Override
- public void reduce(WritableComparable<?> key, Iterator<VectorWritable> vectors,
- OutputCollector<WritableComparable<?>, VectorWritable> out, Reporter reporter) throws IOException {
+ public void reduce(WritableComparable<?> key,
+ Iterator<VectorWritable> vectors,
+ OutputCollector<WritableComparable<?>,VectorWritable> out,
+ Reporter reporter) throws IOException {
out.collect(key, VectorWritable.merge(vectors));
}
}
@@ -146,8 +147,10 @@ public class TransposeJob extends Abstra
implements Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
@Override
- public void reduce(WritableComparable<?> key, Iterator<VectorWritable> vectors,
- OutputCollector<WritableComparable<?>, VectorWritable> out, Reporter reporter) throws IOException {
+ public void reduce(WritableComparable<?> key,
+ Iterator<VectorWritable> vectors,
+ OutputCollector<WritableComparable<?>, VectorWritable> out,
+ Reporter reporter) throws IOException {
Vector merged = VectorWritable.merge(vectors).get();
out.collect(key, new VectorWritable(new SequentialAccessSparseVector(merged)));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Wed Jun 20 12:07:50 2012
@@ -57,7 +57,7 @@ public class DistributedLanczosSolver ex
* For the distributed case, the best guess at a useful initialization state for Lanczos we'll chose to be
* uniform over all input dimensions, L_2 normalized.
*/
- public Vector getInitialVector(VectorIterable corpus) {
+ public static Vector getInitialVector(VectorIterable corpus) {
Vector initialVector = new DenseVector(corpus.numCols());
initialVector.assign(1.0 / Math.sqrt(corpus.numCols()));
return initialVector;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Wed Jun 20 12:07:50 2012
@@ -30,7 +30,6 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.MatrixSlice;
-import org.apache.mahout.math.OrthonormalityVerifier;
import org.apache.mahout.math.SparseRowMatrix;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorIterable;
@@ -182,10 +181,6 @@ public class EigenVerificationJob extend
return parseArguments(args);
}
- private VectorIterable computePairwiseInnerProducts() {
- return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify);
- }
-
private void saveCleanEigens(Configuration conf, Collection<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta)
throws IOException {
Path path = new Path(outPath, CLEAN_EIGENVECTORS);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java Wed Jun 20 12:07:50 2012
@@ -84,10 +84,8 @@ public class HdfsBackedLanczosState exte
}
private void createDirIfNotExist(Path path) throws IOException {
- if (!fs.exists(path)) {
- if (!fs.mkdirs(path)) {
- throw new IOException("Unable to create: " + path);
- }
+ if (!fs.exists(path) && !fs.mkdirs(path)) {
+ throw new IOException("Unable to create: " + path);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Wed Jun 20 12:07:50 2012
@@ -201,7 +201,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector rowVector = similarity.normalize(vectorWritable.get());
@@ -243,7 +243,7 @@ public class RowSimilarityJob extends Ab
public static class MergeVectorsCombiner extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
@Override
protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
ctx.write(row, new VectorWritable(Vectors.merge(partialVectors)));
}
}
@@ -263,7 +263,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void reduce(IntWritable row, Iterable<VectorWritable> partialVectors, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector partialVector = Vectors.merge(partialVectors);
if (row.get() == NORM_VECTOR_MARKER) {
@@ -316,7 +316,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void map(IntWritable column, VectorWritable occurrenceVector, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector.Element[] occurrences = Vectors.toArray(occurrenceVector);
Arrays.sort(occurrences, BY_INDEX);
@@ -342,8 +342,7 @@ public class RowSimilarityJob extends Ab
}
- public static class SimilarityReducer
- extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
+ public static class SimilarityReducer extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
private VectorSimilarityMeasure similarity;
private int numberOfColumns;
@@ -364,7 +363,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Iterator<VectorWritable> partialDotsIterator = partialDots.iterator();
Vector dots = partialDotsIterator.next().get();
while (partialDotsIterator.hasNext()) {
@@ -405,7 +404,7 @@ public class RowSimilarityJob extends Ab
@Override
protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector similarities = similaritiesWritable.get();
// For performance reasons moved transposedPartial creation out of the while loop and reusing the same vector
Vector transposedPartial = similarities.like();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java Wed Jun 20 12:07:50 2012
@@ -37,9 +37,10 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
-public class Vectors {
+public final class Vectors {
- private Vectors() {}
+ private Vectors() {
+ }
public static Vector maybeSample(Vector original, int sampleSize) {
if (original.getNumNondefaultElements() <= sampleSize) {
@@ -165,7 +166,8 @@ public class Vectors {
/* ugly optimization for loading sparse vectors containing ints only */
public static OpenIntIntHashMap readAsIntMap(DataInput in) throws IOException {
int flags = in.readByte();
- Preconditions.checkArgument(flags >> VectorWritable.NUM_FLAGS == 0, "Unknown flags set: %d", Integer.toString(flags, 2));
+ Preconditions.checkArgument(flags >> VectorWritable.NUM_FLAGS == 0,
+ "Unknown flags set: %d", Integer.toString(flags, 2));
boolean dense = (flags & VectorWritable.FLAG_DENSE) != 0;
boolean sequential = (flags & VectorWritable.FLAG_SEQUENTIAL) != 0;
boolean laxPrecision = (flags & VectorWritable.FLAG_LAX_PRECISION) != 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java Wed Jun 20 12:07:50 2012
@@ -23,9 +23,11 @@ public class LoglikelihoodSimilarity ext
@Override
public double similarity(double summedAggregations, double normA, double normB, int numberOfColumns) {
- double logLikelihood = LogLikelihood.logLikelihoodRatio((long) summedAggregations, (long) (normB - summedAggregations),
- (long) (normA - summedAggregations), (long) (numberOfColumns - normA - normB + summedAggregations));
-
+ double logLikelihood =
+ LogLikelihood.logLikelihoodRatio((long) summedAggregations,
+ (long) (normB - summedAggregations),
+ (long) (normA - summedAggregations),
+ (long) (numberOfColumns - normA - normB + summedAggregations));
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java Wed Jun 20 12:07:50 2012
@@ -55,9 +55,8 @@ public class DistributedConjugateGradien
* @param b Vector b
* @param preconditioner Optional preconditioner for the system
* @param maxIterations Maximum number of iterations to run, defaults to numCols
- * @param maxError Maximum error tolerated in the result. If the norm of the residual falls below this, then the
- * algorithm stops and returns.
-
+ * @param maxError Maximum error tolerated in the result. If the norm of the residual falls below this,
+ * then the algorithm stops and returns.
* @return The vector that solves the system.
*/
public Vector runJob(Path inputPath,
@@ -92,7 +91,9 @@ public class DistributedConjugateGradien
Path vectorPath = new Path(AbstractJob.getOption(parsedArgs, "--vector"));
int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows"));
int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols"));
- int maxIterations = parsedArgs.containsKey("--maxIter") ? Integer.parseInt(AbstractJob.getOption(parsedArgs, "--maxIter")) : numCols;
+ int maxIterations = parsedArgs.containsKey("--maxIter")
+ ? Integer.parseInt(AbstractJob.getOption(parsedArgs, "--maxIter"))
+ : numCols;
double maxError = parsedArgs.containsKey("--maxError")
? Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError"))
: ConjugateGradientSolver.DEFAULT_MAX_ERROR;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java Wed Jun 20 12:07:50 2012
@@ -23,9 +23,9 @@ package org.apache.mahout.math.hadoop.st
*/
public final class VarianceTotals {
- private double sumOfSquares = 0.0;
- private double sum = 0.0;
- private double totalCount = 0.0;
+ private double sumOfSquares;
+ private double sum;
+ private double totalCount;
public double getSumOfSquares() {
return sumOfSquares;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java Wed Jun 20 12:07:50 2012
@@ -62,7 +62,7 @@ import org.apache.mahout.math.hadoop.sto
*
*/
@SuppressWarnings("deprecation")
-public class ABtJob {
+public final class ABtJob {
public static final String PROP_BT_PATH = "ssvd.Bt.path";
public static final String PROP_BT_BROADCAST = "ssvd.Bt.broadcast";
@@ -220,16 +220,16 @@ public class ABtJob {
// DEBUG: stdout
//System.out.printf("list of files: " + btFiles);
- String btLocalPath = "";
+ StringBuilder btLocalPath = new StringBuilder();
for (Path btFile : btFiles) {
- if (!btLocalPath.isEmpty()) {
- btLocalPath += Path.SEPARATOR_CHAR;
+ if (btLocalPath.length() > 0) {
+ btLocalPath.append(Path.SEPARATOR_CHAR);
}
- btLocalPath += btFile;
+ btLocalPath.append(btFile);
}
btInput =
- new SequenceFileDirIterator<IntWritable, VectorWritable>(new Path(btLocalPath),
+ new SequenceFileDirIterator<IntWritable, VectorWritable>(new Path(btLocalPath.toString()),
PathType.LIST,
null,
null,
@@ -379,16 +379,12 @@ public class ABtJob {
/**
* key doesn't matter here, only value does. key always gets substituted by
* SPW.
- *
- * @param <K>
- * bogus
*/
- private <K, V> OutputCollector<K, V>
- createOutputCollector(String name,
- final SplitPartitionedWritable spw,
- Context ctx,
- Class<V> valueClass) throws IOException,
- InterruptedException {
+ private <K,V> OutputCollector<K,V> createOutputCollector(String name,
+ final SplitPartitionedWritable spw,
+ Context ctx,
+ Class<V> valueClass)
+ throws IOException, InterruptedException {
Path outputPath = getSplitFilePath(name, spw, ctx);
final SequenceFile.Writer w =
SequenceFile.createWriter(FileSystem.get(outputPath.toUri(), ctx.getConfiguration()),
@@ -406,9 +402,7 @@ public class ABtJob {
}
@Override
- protected void cleanup(Context context) throws IOException,
- InterruptedException {
-
+ protected void cleanup(Context context) throws IOException, InterruptedException {
IOUtils.close(closeables);
}