You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/01/09 13:53:26 UTC
svn commit: r897440 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/
main/java/org/apache/mahout/classifier/
main/java/org/apache/mahout/classifier/bayes/datastore/
main/java/org/apache/mahout/clustering/kmea...
Author: srowen
Date: Sat Jan 9 12:53:25 2010
New Revision: 897440
URL: http://svn.apache.org/viewvc?rev=897440&view=rev
Log:
Code style adjustments; enabled/fixed TestSamplingIterator
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/Bigram.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemBigramGenerator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemSimilarityEstimator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemJoiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemRecommender.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/Integers.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/Bigram.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/Bigram.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/Bigram.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/Bigram.java Sat Jan 9 12:53:25 2010
@@ -90,14 +90,15 @@
@Override
public int compareTo(Bigram o) {
- if (o == null) {
- return 1;
- }
- int ret = first - o.first;
- if (ret == 0) {
- ret = second - o.second;
+ if (first == o.first) {
+ if (second == o.second) {
+ return 0;
+ } else {
+ return second < o.second ? -1 : 1;
+ }
+ } else {
+ return first < o.first ? -1 : 1;
}
- return ret;
}
@Override
@@ -116,7 +117,7 @@
@Override
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
- int ret = -1;
+ int ret;
try {
int firstb1 = WritableComparator.readVInt(b1, s1);
int firstb2 = WritableComparator.readVInt(b2, s2);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemBigramGenerator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemBigramGenerator.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemBigramGenerator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemBigramGenerator.java Sat Jan 9 12:53:25 2010
@@ -32,7 +32,6 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@@ -46,6 +45,8 @@
public final class ItemBigramGenerator extends Configured implements Tool {
+ private static final Logger log = LoggerFactory.getLogger(ItemBigramGenerator.class);
+
public static class UserItemMapper extends MapReduceBase
implements Mapper<LongWritable, Text, VIntWritable, VIntWritable> {
@@ -162,7 +163,7 @@
public int run(String[] args) throws IOException {
// TODO use Commons CLI 2
if (args.length < 2) {
- System.out.println("Usage: ItemBigramGemerator <input-dir> <output-dir> [reducers]");
+ log.error("Usage: ItemBigramGemerator <input-dir> <output-dir> [reducers]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
@@ -171,7 +172,7 @@
Path outputPath = new Path(args[1]);
int reducers = args.length > 2 ? Integer.parseInt(args[2]) : 1;
JobConf jobConf = prepareJob(inputPaths, outputPath, reducers);
- RunningJob job = JobClient.runJob(jobConf);
+ JobClient.runJob(jobConf);
return 0;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemSimilarityEstimator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemSimilarityEstimator.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemSimilarityEstimator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/ItemSimilarityEstimator.java Sat Jan 9 12:53:25 2010
@@ -34,15 +34,17 @@
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Iterator;
import java.util.PriorityQueue;
+import java.util.Queue;
/**
@@ -52,6 +54,8 @@
*/
public final class ItemSimilarityEstimator extends Configured implements Tool {
+ private static final Logger log = LoggerFactory.getLogger(ItemSimilarityEstimator.class);
+
/** Partition based on the first part of the bigram. */
public static class FirstPartitioner implements Partitioner<Bigram, Writable> {
@@ -106,7 +110,7 @@
/** All sorted bigrams for item1 are recieved in reduce. <p/> K -> (item1, item2), V -> (FREQ) */
public static class ItemItemReducer extends MapReduceBase implements Reducer<Bigram, Bigram, Bigram, DoubleWritable> {
- private PriorityQueue<Bigram.Frequency> freqBigrams = new PriorityQueue<Bigram.Frequency>();
+ private final Queue<Bigram.Frequency> freqBigrams = new PriorityQueue<Bigram.Frequency>();
private Bigram key = new Bigram();
private DoubleWritable value = new DoubleWritable();
@@ -165,7 +169,7 @@
}
- public JobConf prepareJob(String inputPaths, Path outputPath, int maxFreqItems, int reducers) throws IOException {
+ public JobConf prepareJob(String inputPaths, Path outputPath, int maxFreqItems, int reducers) {
JobConf job = new JobConf(getConf());
job.setJobName("Item Bigram Counter");
job.setJarByClass(this.getClass());
@@ -200,9 +204,7 @@
public int run(String[] args) throws IOException {
// TODO use Commons CLI 2
if (args.length < 2) {
- System.out
- .println("ItemSimilarityEstimator <input-dirs> <output-dir> "
- + "[max-frequent-items] [reducers]");
+ log.error("ItemSimilarityEstimator <input-dirs> <output-dir> [max-frequent-items] [reducers]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
@@ -212,7 +214,7 @@
int maxFreqItems = args.length > 2 ? Integer.parseInt(args[2]) : 20;
int reducers = args.length > 3 ? Integer.parseInt(args[3]) : 1;
JobConf jobConf = prepareJob(inputPaths, outputPath, maxFreqItems, reducers);
- RunningJob job = JobClient.runJob(jobConf);
+ JobClient.runJob(jobConf);
return 0;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemJoiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemJoiner.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemJoiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemJoiner.java Sat Jan 9 12:53:25 2010
@@ -32,7 +32,6 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
@@ -49,6 +48,8 @@
public final class UserItemJoiner extends Configured implements Tool {
+ private static final Logger log = LoggerFactory.getLogger(UserItemJoiner.class);
+
public static class JoinUserMapper extends MapReduceBase
implements Mapper<LongWritable, Text, Bigram, TupleWritable> {
@@ -144,7 +145,7 @@
}
- public JobConf prepareJob(Path userInputPath, Path itemInputPath, Path outputPath, int reducers) throws IOException {
+ public JobConf prepareJob(Path userInputPath, Path itemInputPath, Path outputPath, int reducers) {
JobConf job = new JobConf(getConf());
job.setJobName("User Item Joiner");
job.setJarByClass(this.getClass());
@@ -178,9 +179,7 @@
public int run(String[] args) throws IOException {
// TODO use Commons CLI 2
if (args.length < 3) {
- System.out
- .println("UserItemJoiner <user-input-dirs> <item-input-dir> <output-dir> "
- + "[reducers]");
+ log.error("UserItemJoiner <user-input-dirs> <item-input-dir> <output-dir> [reducers]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
@@ -190,7 +189,7 @@
Path outputPath = new Path(args[2]);
int reducers = args.length > 3 ? Integer.parseInt(args[3]) : 1;
JobConf jobConf = prepareJob(userInputPath, itemInputPath, outputPath, reducers);
- RunningJob job = JobClient.runJob(jobConf);
+ JobClient.runJob(jobConf);
return 0;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemRecommender.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/UserItemRecommender.java Sat Jan 9 12:53:25 2010
@@ -29,11 +29,12 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
@@ -49,6 +50,8 @@
public final class UserItemRecommender extends Configured implements Tool {
+ private static final Logger log = LoggerFactory.getLogger(UserItemRecommender.class);
+
public static class RecommenderMapper extends MapReduceBase
implements Mapper<VIntWritable, TupleWritable, Bigram, TupleWritable> {
@@ -142,8 +145,7 @@
}
}
- public JobConf prepareJob(String inputPaths, Path outputPath, int maxRecommendations, int reducers)
- throws IOException {
+ public JobConf prepareJob(String inputPaths, Path outputPath, int maxRecommendations, int reducers) {
JobConf job = new JobConf(getConf());
job.setJobName("User Item Recommendations");
job.setJarByClass(this.getClass());
@@ -172,9 +174,7 @@
public int run(String[] args) throws IOException {
// TODO use Commons CLI 2
if (args.length < 2) {
- System.out
- .println("UserItemRecommender <input-dirs> <output-dir> "
- + "[max-recommendations] [reducers]");
+ log.error("UserItemRecommender <input-dirs> <output-dir> [max-recommendations] [reducers]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
@@ -184,7 +184,7 @@
int maxRecommendations = args.length > 2 ? Integer.parseInt(args[2]) : 100;
int reducers = args.length > 3 ? Integer.parseInt(args[3]) : 1;
JobConf jobConf = prepareJob(inputPaths, outputPath, maxRecommendations, reducers);
- RunningJob job = JobClient.runJob(jobConf);
+ JobClient.runJob(jobConf);
return 0;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Sat Jan 9 12:53:25 2010
@@ -36,7 +36,7 @@
private final Map<String,Integer> labelMap = new HashMap<String,Integer>();
- private int[][] confusionMatrix;
+ private final int[][] confusionMatrix;
private String defaultLabel = "unknown";
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Sat Jan 9 12:53:25 2010
@@ -30,7 +30,7 @@
*/
public class ResultAnalyzer implements Summarizable {
- private ConfusionMatrix confusionMatrix;
+ private final ConfusionMatrix confusionMatrix;
/*
* === Summary ===
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Sat Jan 9 12:53:25 2010
@@ -52,11 +52,11 @@
private HTable table;
- private Cache<String,Result> tableCache;
+ private final Cache<String,Result> tableCache;
private final String hbaseTable;
- private Parameters parameters;
+ private final Parameters parameters;
private double thetaNormalizer = 1.0;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Sat Jan 9 12:53:25 2010
@@ -44,7 +44,7 @@
private final Map<String,Map<String,Double>> vectors = new HashMap<String,Map<String,Double>>();
- private Parameters params;
+ private final Parameters params;
private double thetaNormalizer = 1.0;
@@ -110,8 +110,11 @@
index)
/ thetaNormalizer;
else if (vectorName.equals("params")) {
- if (index.equals("alpha_i")) return alphaI;
- else throw new InvalidDatastoreException();
+ if (index.equals("alpha_i")) {
+ return alphaI;
+ } else {
+ throw new InvalidDatastoreException();
+ }
}
return vectorGetCell(vectorName, index);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java Sat Jan 9 12:53:25 2010
@@ -41,9 +41,4 @@
output.collect(key, new KMeansInfo(cluster.getNumPoints(), cluster.getPointTotal()));
}
- @Override
- public void configure(JobConf job) {
- super.configure(job);
- }
-
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java Sat Jan 9 12:53:25 2010
@@ -23,6 +23,8 @@
import java.util.regex.Pattern;
public class StringRecordIterator implements Iterator<Pair<List<String>,Long>> {
+
+ private static final Long ONE = 1L;
private final Iterator<String> lineIterator;
private Pattern splitter = null;
@@ -41,7 +43,7 @@
public Pair<List<String>,Long> next() {
String line = lineIterator.next();
String[] items = splitter.split(line);
- return new Pair<List<String>,Long>(Arrays.asList(items), Long.valueOf(1));
+ return new Pair<List<String>,Long>(Arrays.asList(items), ONE);
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java Sat Jan 9 12:53:25 2010
@@ -55,12 +55,14 @@
}
public double dot(Vector a, Vector b) {
+ boolean sameVector = a == b;
Iterator<Vector.Element> it = a.iterateNonZero();
Vector.Element el;
Vector weights = getWeights();
double dot = 0;
while (it.hasNext() && (el = it.next()) != null) {
- double value = el.get() * (a == b ? el.get() : b.getQuick(el.index()));
+ double elementValue = el.get();
+ double value = elementValue * (sameVector ? elementValue : b.getQuick(el.index()));
if (weights != null) {
value *= weights.getQuick(el.index());
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java Sat Jan 9 12:53:25 2010
@@ -49,7 +49,7 @@
* preparation step of the partial mapreduce builder. Computes some stats that
* will be used by the builder.
*/
-public class Step0Job implements Cloneable {
+public class Step0Job {
/** directory that will hold this job's output */
private final Path outputPath;
@@ -245,7 +245,7 @@
*
*/
public static class Step0Output implements Writable,
- Comparable<Step0Output> {
+ Comparable<Step0Output>, Cloneable {
/**
* first key of the partition<br>
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java Sat Jan 9 12:53:25 2010
@@ -238,7 +238,7 @@
*
*/
protected static class Frequencies implements Writable,
- Comparable<Frequencies> {
+ Comparable<Frequencies>, Cloneable {
/**
* first key of the partition<br>
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java Sat Jan 9 12:53:25 2010
@@ -51,7 +51,7 @@
public List<Integer> next() {
List<Integer> returnable = currentPattern.getFirst();
currentCount++;
- if (currentCount == currentPattern.getSecond().longValue()) {
+ if (currentCount == currentPattern.getSecond()) {
if (pIterator.hasNext()) {
currentPattern = pIterator.next();
currentCount = 0;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Sat Jan 9 12:53:25 2010
@@ -42,7 +42,7 @@
public final class TransactionTreeIterator implements Iterator<Pair<List<Integer>, Long>> {
- Stack<int[]> depth = new Stack<int[]>();
+ private final Stack<int[]> depth = new Stack<int[]>();
public TransactionTreeIterator() {
depth.push(new int[] {0, -1});
@@ -50,17 +50,14 @@
@Override
public boolean hasNext() {
- if (depth.isEmpty()) {
- return false;
- }
- return true;
+ return !depth.isEmpty();
}
@Override
public Pair<List<Integer>, Long> next() {
- long sum = 0;
- int childId = 0;
+ long sum;
+ int childId;
do {
int[] top = depth.peek();
while (top[1] + 1 == childCount[top[0]]) {
@@ -177,10 +174,10 @@
}
public int addPattern(List<Integer> myList, long addCount) {
- int temp = TransactionTree.ROOTNODEID;
+ int temp = ROOTNODEID;
int ret = 0;
boolean addCountMode = true;
- int child = -1;
+ int child;
for (int attributeValue : myList) {
if (addCountMode) {
@@ -369,7 +366,7 @@
for (int i = 0, j = transactionSet.size(); i < j; i++) {
Pair<List<Integer>, Long> transaction = transactionSet.get(i);
- vLong.set(transaction.getSecond().longValue());
+ vLong.set(transaction.getSecond());
vLong.write(out);
vInt.set(transaction.getFirst().size());
@@ -422,8 +419,7 @@
if (nodeChildren[nodes] == null) {
nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
- int childNodeId = nodes++;
- return childNodeId;
+ return nodes++;
}
private void resize() {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java Sat Jan 9 12:53:25 2010
@@ -46,7 +46,7 @@
public void collect(Integer key, FrequentPatternMaxHeap value) throws IOException {
List<Pair<List<A>, Long>> perAttributePatterns = new ArrayList<Pair<List<A>, Long>>();
PriorityQueue<Pattern> t = value.getHeap();
- while (t.size() > 0) {
+ while (!t.isEmpty()) {
Pattern itemSet = t.poll();
List<A> frequentPattern = new ArrayList<A>();
for (int j = 0; j < itemSet.length(); j++) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Sat Jan 9 12:53:25 2010
@@ -81,7 +81,7 @@
Pair<List<A>, Long> transaction = transactions.next();
for (A attribute : transaction.getFirst()) {
if (attributeSupport.containsKey(attribute) == false) {
- attributeSupport.put(attribute, new MutableLong(transaction.getSecond().longValue()));
+ attributeSupport.put(attribute, new MutableLong(transaction.getSecond()));
} else {
attributeSupport.get(attribute).add(transaction.getSecond().longValue());
// count++;
@@ -212,7 +212,7 @@
return patterns;
}
- private FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int k,
+ private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int k,
MutableLong minSupportMutable) {
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false);
@@ -268,7 +268,7 @@
Pair<int[], Long> transaction = transactions.next();
Arrays.sort(transaction.getFirst());
// attribcount += transaction.length;
- nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond().longValue(),
+ nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(),
minSupportMutable, attributeFrequency);
i++;
if (i % 10000 == 0) {
@@ -355,7 +355,6 @@
return generateSinglePathPatterns(tree, k, minSupportMutable);
}
- FrequentPatternMaxHeap returnedPatterns;
updater.update("Bottom Up FP Growth");
for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
int attribute = tree.getAttributeAtIndex(i);
@@ -365,6 +364,7 @@
}
FPTree conditionalTree = treeCache.getTree(level);
+ FrequentPatternMaxHeap returnedPatterns;
if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
conditionalTree, tree);
@@ -424,8 +424,6 @@
updater.update("Top Down Growth:");
- FrequentPatternMaxHeap returnedPatterns;
-
for (int i = 0; i < tree.getHeaderTableCount(); i++) {
int attribute = tree.getAttributeAtIndex(i);
long count = tree.getHeaderSupportCount(attribute);
@@ -435,6 +433,7 @@
FPTree conditionalTree = treeCache.getTree(level);
+ FrequentPatternMaxHeap returnedPatterns;
if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
conditionalTree, tree);
@@ -470,7 +469,7 @@
return frequentPatterns;
}
- private FrequentPatternMaxHeap mergeHeap(FrequentPatternMaxHeap frequentPatterns,
+ private static FrequentPatternMaxHeap mergeHeap(FrequentPatternMaxHeap frequentPatterns,
FrequentPatternMaxHeap returnedPatterns, int attribute, long count, boolean addAttribute,
boolean subPatternCheck) {
frequentPatterns.addAll(returnedPatterns, attribute, count);
@@ -541,7 +540,7 @@
}
- private void pruneFPTree(MutableLong minSupportMutable, FPTree tree) {
+ private static void pruneFPTree(MutableLong minSupportMutable, FPTree tree) {
for (int i = 0; i < tree.getHeaderTableCount(); i++) {
int currentAttribute = tree.getAttributeAtIndex(i);
if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable.intValue()) {
@@ -606,7 +605,7 @@
* @param attributeFrequency the list of attributes and their frequency
* @return the number of new nodes added
*/
- private int treeAddCount(FPTree tree, int[] myList, long addCount, MutableLong minSupport,
+ private static int treeAddCount(FPTree tree, int[] myList, long addCount, MutableLong minSupport,
long[] attributeFrequency) {
int temp = FPTree.ROOTNODEID;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Sat Jan 9 12:53:25 2010
@@ -67,7 +67,7 @@
private boolean singlePath;
- private Set<Integer> sortedSet = new TreeSet<Integer>();
+ private final Set<Integer> sortedSet = new TreeSet<Integer>();
public FPTree() {
this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
@@ -201,8 +201,7 @@
nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
- int childNodeId = nodes++;
- return childNodeId;
+ return nodes++;
}
public final int createNode(int parentNodeId, int attributeValue, long count) {
@@ -236,8 +235,7 @@
if (nodeChildren[nodes] == null) {
nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
}
- int childNodeId = nodes++;
- return childNodeId;
+ return nodes++;
}
public final int getAttributeAtIndex(int index) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java Sat Jan 9 12:53:25 2010
@@ -21,7 +21,6 @@
import java.util.ArrayList;
-import org.apache.mahout.common.cache.LeastKCache;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Sat Jan 9 12:53:25 2010
@@ -44,7 +44,7 @@
this.subPatternCheck = subPatternCheck;
patternIndex = new HashMap<Long, Set<Pattern>>();
for (Pattern p : queue) {
- Long index = Long.valueOf(p.support());
+ Long index = p.support();
Set<Pattern> patternList;
if (patternIndex.containsKey(index) == false) {
patternList = new HashSet<Pattern>();
@@ -133,11 +133,6 @@
return least.support();
}
- @Override
- public String toString() {
- return super.toString();
- }
-
private boolean addPattern(Pattern frequentPattern) {
if (subPatternCheck == false) {
queue.add(frequentPattern);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Sat Jan 9 12:53:25 2010
@@ -40,11 +40,11 @@
public class TestMeanShift extends TestCase {
- Vector[] raw = null;
+ private Vector[] raw = null;
- static FileSystem fs;
+ private FileSystem fs;
- static Configuration conf;
+ private Configuration conf;
// DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
@@ -97,7 +97,7 @@
}
}
- private static void writePointsToFile(Vector[] points, String fileName)
+ private void writePointsToFile(Vector[] points, String fileName)
throws IOException {
Path path = new Path(fileName);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/Integers.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/Integers.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/Integers.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/Integers.java Sat Jan 9 12:53:25 2010
@@ -28,7 +28,7 @@
private Integers() {
}
- public static Iterator<Integer> iterator(final int n) {
+ public static Iterator<Integer> iterator(int n) {
return new IntegerIterator(n);
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java Sat Jan 9 12:53:25 2010
@@ -23,7 +23,7 @@
import org.apache.mahout.common.RandomUtils;
-public abstract class TestSamplingIterator extends TestCase {
+public class TestSamplingIterator extends TestCase {
@Override
protected void setUp() throws Exception {
@@ -70,7 +70,7 @@
}
double sd = Math.sqrt(0.9 * 0.1 * 1000);
assertTrue(k >= 100 - 3 * sd);
- assertTrue(k >= 100 + 3 * sd);
+ assertTrue(k <= 100 + 3 * sd);
}
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartialSequentialBuilder.java Sat Jan 9 12:53:25 2010
@@ -54,17 +54,17 @@
private static final Logger log = LoggerFactory.getLogger(PartialSequentialBuilder.class);
- protected PartialOutputCollector firstOutput;
+ private PartialOutputCollector firstOutput;
- protected PartialOutputCollector secondOutput;
+ private PartialOutputCollector secondOutput;
- protected final Dataset dataset;
+ private final Dataset dataset;
/** first instance id in hadoop's order */
- protected int[] firstIds;
+ private int[] firstIds;
/** partitions' sizes in hadoop order */
- protected int[] sizes;
+ private int[] sizes;
public PartialSequentialBuilder(TreeBuilder treeBuilder, Path dataPath,
Dataset dataset, long seed, Configuration conf) {
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java Sat Jan 9 12:53:25 2010
@@ -25,6 +25,7 @@
import junit.framework.TestCase;
import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,13 +48,13 @@
private static final int SKIP_RATE = 10;
- private Random gen = new Random();
+ private final Random gen = RandomUtils.getRandom();
public List<Integer> generateRandomArray() {
List<Integer> list = new ArrayList<Integer>();
for (int i = 0; i < MAX_FEATURES; i++) {
if (gen.nextInt() % SKIP_RATE == 0) {
- list.add(Integer.valueOf(i));
+ list.add(i);
}
}
return list;
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java?rev=897440&r1=897439&r2=897440&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java Sat Jan 9 12:53:25 2010
@@ -18,16 +18,17 @@
package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashSet;
import java.util.Random;
-import java.util.Set;
import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
public class FrequentPatternMaxHeapTest extends TestCase {
public void testMapHeap() {
- Random gen = new Random(123L);
+ Random gen = RandomUtils.getRandom();
FrequentPatternMaxHeap pq = new FrequentPatternMaxHeap(50, true);
for (int i = 0; i < 20; i++) {
@@ -42,10 +43,10 @@
}
}
- public Pattern generateRandomPattern(Random gen) {
+ public static Pattern generateRandomPattern(Random gen) {
int length = 1 + Math.abs(gen.nextInt() % 6);
Pattern p = new Pattern();
- Set<Integer> set = new HashSet<Integer>();
+ Collection<Integer> set = new HashSet<Integer>();
for (int i = 0; i < length; i++) {
int id = Math.abs(gen.nextInt() % 20);
while (set.contains(id)) {