You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/10/20 16:04:58 UTC
svn commit: r827468 [1/2] - in /lucene/mahout/trunk: ./ core/
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/mahout/classifier/bayes/datastore/
core/src/mai...
Author: srowen
Date: Tue Oct 20 14:04:55 2009
New Revision: 827468
URL: http://svn.apache.org/viewvc?rev=827468&view=rev
Log:
Various code convention updates before 0.2
Modified:
lucene/mahout/trunk/core/pom.xml
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonDirichletStateAdapter.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/SequenceFileOutputCollector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerStringOutputConvertor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerTupleIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringOutputConvertor.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringTupleIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package.html
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/MatrixView.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseMatrix.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/JsonModelHolderAdapter.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/ga/watchmaker/utils/DummyEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/meanshift/DisplayMeanShift.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/InputMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapred/BuildForest.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/BuildForest.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthJob.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDFitnessEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDMutation.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataSet.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/FileInfoParser.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/Descriptors.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/StrategyPanel.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDMutationTest.java
lucene/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/CDRuleTest.java
lucene/mahout/trunk/pom.xml
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterable.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
Modified: lucene/mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/pom.xml?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/pom.xml (original)
+++ lucene/mahout/trunk/core/pom.xml Tue Oct 20 14:04:55 2009
@@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -39,10 +40,13 @@
</configuration>
<executions>
<execution>
- <id>make-assembly</id> <!-- this is used for inheritance merges -->
- <phase>package</phase> <!-- append to the packaging phase. -->
+ <id>make-assembly</id>
+ <!-- this is used for inheritance merges -->
+ <phase>package</phase>
+ <!-- append to the packaging phase. -->
<goals>
- <goal>single</goal> <!-- goals == mojos -->
+ <goal>single</goal>
+ <!-- goals == mojos -->
</goals>
</execution>
</executions>
@@ -84,7 +88,7 @@
<createChecksum>true</createChecksum>
</configuration>
</execution>
- <execution>
+ <execution>
<id>create-hbase-artifact</id>
<phase>validate</phase>
<goals>
@@ -372,10 +376,10 @@
<configuration>
<tasks>
<ant antfile="../maven/build.xml" target="core-job">
- <property name="dest" value="${project.build.directory}" />
- <property name="fullnamever" value="${project.artifactId}-${project.version}" />
- <property name="core-lib" value="lib" />
- <property name="shared-lib" value="../lib" />
+ <property name="dest" value="${project.build.directory}"/>
+ <property name="fullnamever" value="${project.artifactId}-${project.version}"/>
+ <property name="core-lib" value="lib"/>
+ <property name="shared-lib" value="../lib"/>
</ant>
</tasks>
@@ -419,16 +423,16 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
- <configuration>
- <appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
+ <configuration>
+ <appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
<resourceBundles>
<resourceBundle>org.apache:apache-jar-resource-bundle:1.4</resourceBundle>
</resourceBundles>
- <supplementalModels>
+ <supplementalModels>
<supplementalModel>supplemental-models.xml</supplementalModel>
</supplementalModels>
</configuration>
- </plugin>
+ </plugin>
</plugins>
</build>
@@ -471,8 +475,6 @@
<version>1.1.1</version>
</dependency>
-
-
<dependency>
<groupId>commons-httpclient</groupId>
@@ -579,14 +581,14 @@
<version>3.8.2</version>
<scope>test</scope>
</dependency>
-
+
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymockclassextension</artifactId>
<version>2.2</version>
<scope>test</scope>
</dependency>
-
+
<!-- Gson: Java to Json conversion -->
<dependency>
<groupId>com.google.code.gson</groupId>
@@ -594,7 +596,7 @@
<version>1.3</version>
<scope>compile</scope>
</dependency>
-
+
</dependencies>
<repositories>
<repository>
@@ -606,7 +608,7 @@
<releases>
<enabled>false</enabled>
</releases>
- </repository>
+ </repository>
<repository>
<id>gson</id>
<url>http://google-gson.googlecode.com/svn/mavenrepo</url>
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java Tue Oct 20 14:04:55 2009
@@ -201,20 +201,19 @@
Float oldPref = dataModel.getPreferenceValue(userID, itemID);
super.removePreference(userID, itemID);
if (oldPref != null) {
- double value = oldPref;
try {
buildAveragesLock.writeLock().lock();
RunningAverage itemAverage = itemAverages.get(itemID);
if (itemAverage == null) {
throw new IllegalStateException("No preferences exist for item ID: " + itemID);
}
- itemAverage.removeDatum(value);
+ itemAverage.removeDatum(oldPref);
RunningAverage userAverage = userAverages.get(userID);
if (userAverage == null) {
throw new IllegalStateException("No preferences exist for user ID: " + userID);
}
- userAverage.removeDatum(value);
- overallAveragePrefValue.removeDatum(value);
+ userAverage.removeDatum(oldPref);
+ overallAveragePrefValue.removeDatum(oldPref);
} finally {
buildAveragesLock.writeLock().unlock();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Tue Oct 20 14:04:55 2009
@@ -55,7 +55,7 @@
*/
public boolean addInstance(String correctLabel, ClassifierResult classifiedResult) {
boolean result = correctLabel.equals(classifiedResult.getLabel());
- if (result == true) {
+ if (result) {
correctlyClassified++;
} else {
incorrectlyClassified++;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Tue Oct 20 14:04:55 2009
@@ -61,9 +61,9 @@
alpha_i = Double.valueOf(parameters.get("alpha_i", "1.0"));
}
- protected double thetaNormalizer = 1.0d;
+ protected double thetaNormalizer = 1.0;
- protected double alpha_i = 1.0d;
+ protected double alpha_i = 1.0;
@Override
public void initialize() throws InvalidDatastoreException {
config = new HBaseConfiguration(new Configuration());
@@ -154,7 +154,7 @@
}
protected double getCachedCell(String row, String family, String column) {
- Result r = null;
+ Result r;
if ((r = tableCache.get(row)) == null) {
Get g = new Get(Bytes.toBytes(row));
@@ -162,7 +162,7 @@
try {
r = table.get(g);
} catch (IOException e) {
- return 0.0d;
+ return 0.0;
}
tableCache.set(row, r);
}
@@ -170,7 +170,7 @@
Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
.toBytes(column));
if (value == null)
- return 0.0d;
+ return 0.0;
return Bytes.toDouble(value);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Tue Oct 20 14:04:55 2009
@@ -38,9 +38,9 @@
Parameters params = null;
- protected double thetaNormalizer = 1.0d;
+ protected double thetaNormalizer = 1.0;
- protected double alpha_i = 1.0d;
+ protected double alpha_i = 1.0;
public InMemoryBayesDatastore(Parameters params) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java Tue Oct 20 14:04:55 2009
@@ -87,7 +87,7 @@
outputTuple.add(correctLabel);
outputTuple.add(classifiedLabel);
- output.collect(outputTuple, new DoubleWritable(1.0d));
+ output.collect(outputTuple, new DoubleWritable(1.0));
} catch (InvalidDatastoreException e) {
throw new IOException(e.toString());
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java Tue Oct 20 14:04:55 2009
@@ -18,6 +18,8 @@
import org.apache.mahout.classifier.bayes.common.BayesParameters;
+import java.io.IOException;
+
/**
* Implementors of this interface provide a way for running bayes training jobs on
* a hadoop cluster.
@@ -29,6 +31,7 @@
* @param input path to training documents.
* @param output path to output directory.
* */
- void runJob(final String input, final String output, final BayesParameters params) throws Exception;
+ void runJob(String input, String output, BayesParameters params)
+ throws IOException, ClassNotFoundException, InterruptedException;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java Tue Oct 20 14:04:55 2009
@@ -69,13 +69,13 @@
output.collect(key, new DoubleWritable(logIdf));
reporter.setStatus("Bayes TfIdf Mapper: log(Idf): " + key);
} else
- throw new RuntimeException("Unrecognized Tuple: " + key);
+ throw new IllegalArgumentException("Unrecognized Tuple: " + key);
} else if (key.length() == 2) {
if (key.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
output.collect(vocabCount, one);
reporter.setStatus("Bayes TfIdf Mapper: vocabCount");
} else
- throw new RuntimeException("Unexpected Tuple: " + key);
+ throw new IllegalArgumentException("Unexpected Tuple: " + key);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java Tue Oct 20 14:04:55 2009
@@ -85,17 +85,16 @@
String feature = key.stringAt(2);
if (useHbase) {
Put bu = new Put(Bytes.toBytes(feature));
- bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
- .toBytes(label), Bytes.toBytes(idfTimes_D_ij));
+ bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY),
+ Bytes.toBytes(label), Bytes.toBytes(idfTimes_D_ij));
table.put(bu);
}
}
- reporter
- .setStatus("Bayes TfIdf Reducer: " + key + " => " + idfTimes_D_ij);
+ reporter.setStatus("Bayes TfIdf Reducer: " + key + " => " + idfTimes_D_ij);
output.collect(key, new DoubleWritable(idfTimes_D_ij));
} else {
- throw new RuntimeException("Unexpected StringTuple: " + key);
+ throw new IllegalArgumentException("Unexpected StringTuple: " + key);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java Tue Oct 20 14:04:55 2009
@@ -64,7 +64,7 @@
else if(key.stringAt(0).equals(BayesConstants.LABEL_SUM))
return "Sigma_k/" + name;
else
- throw new RuntimeException("Unexpected StringTuple: " + key);
+ throw new IllegalArgumentException("Unexpected StringTuple: " + key);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Tue Oct 20 14:04:55 2009
@@ -31,6 +31,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
public class JobExecutor {
/** Logger for this class. */
private static final Logger log = LoggerFactory.getLogger(BayesDriver.class);
@@ -45,7 +47,8 @@
* @param job the job to execute.
* @throws Exception any exception thrown at job execution.
* */
- public static void execute(final String[] args, final BayesJob job) throws Exception {
+ public static void execute(final String[] args, final BayesJob job)
+ throws ClassNotFoundException, IOException, InterruptedException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletCluster.java Tue Oct 20 14:04:55 2009
@@ -75,7 +75,6 @@
}
/** Reads a typed Model instance from the input stream */
- @SuppressWarnings("unchecked")
public static Model<?> readModel(DataInput in) throws IOException {
String modelClassName = in.readUTF();
Model<?> model;
@@ -94,7 +93,6 @@
}
/** Writes a typed Model instance to the output stream */
- @SuppressWarnings("unchecked")
public static void writeModel(DataOutput out, Model<?> model) throws IOException {
out.writeUTF(model.getClass().getName());
model.write(out);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java Tue Oct 20 14:04:55 2009
@@ -61,7 +61,6 @@
state = getDirichletState(job);
}
- @SuppressWarnings("unchecked")
public static DirichletState<Vector> getDirichletState(JobConf job) {
String statePath = job.get(DirichletDriver.STATE_IN_KEY);
String modelFactory = job.get(DirichletDriver.MODEL_FACTORY_KEY);
@@ -79,11 +78,11 @@
job);
try {
Text key = new Text();
- DirichletCluster<Vector> cluster = new DirichletCluster();
+ DirichletCluster<Vector> cluster = new DirichletCluster<Vector>();
while (reader.next(key, cluster)) {
int index = Integer.parseInt(key.toString());
state.clusters.set(index, cluster);
- cluster = new DirichletCluster();
+ cluster = new DirichletCluster<Vector>();
}
} finally {
reader.close();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Tue Oct 20 14:04:55 2009
@@ -37,7 +37,6 @@
public double offset; // alpha_0 / numClusters
- @SuppressWarnings("unchecked")
public DirichletState(ModelDistribution<O> modelFactory,
int numClusters, double alpha_0, int thin, int burnin) {
this.numClusters = numClusters;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonDirichletStateAdapter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonDirichletStateAdapter.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonDirichletStateAdapter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonDirichletStateAdapter.java Tue Oct 20 14:04:55 2009
@@ -35,7 +35,6 @@
import java.lang.reflect.Type;
import java.util.List;
-@SuppressWarnings("unchecked")
public class JsonDirichletStateAdapter implements
JsonSerializer<DirichletState<?>>, JsonDeserializer<DirichletState<?>> {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Tue Oct 20 14:04:55 2009
@@ -169,7 +169,7 @@
maxIterations = Integer.parseInt(cmdLine.getValue(maxIterationsOpt).toString());
}
- if (cmdLine.hasOption(overwriteOutput) == true) {
+ if (cmdLine.hasOption(overwriteOutput)) {
HadoopUtil.overwriteOutput(output);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java Tue Oct 20 14:04:55 2009
@@ -59,12 +59,12 @@
}
fs.mkdirs(outPath);
Path outFile = new Path(outPath, "part-randomSeed");
- if (fs.exists(outFile) == true) {
+ if (fs.exists(outFile)) {
log.warn("Deleting " + outFile);
fs.delete(outFile, false);
}
boolean newFile = fs.createNewFile(outFile);
- if (newFile == true) {
+ if (newFile) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(input), conf);
Writable key = (Writable) reader.getKeyClass().newInstance();
Vector value = (Vector) reader.getValueClass().newInstance();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Tue Oct 20 14:04:55 2009
@@ -38,8 +38,7 @@
JobConf conf = new JobConf(KMeansDriver.class);
Path outPath = new Path(output);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
- boolean exists = fs.exists(outPath);
- if (exists == true) {
+ if (fs.exists(outPath)) {
log.warn("Deleting " + outPath);
fs.delete(outPath, true);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java Tue Oct 20 14:04:55 2009
@@ -25,7 +25,6 @@
import java.util.Collections;
import java.util.List;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
@@ -33,7 +32,7 @@
*
*
*/
-public class IntegerTuple implements Writable, WritableComparable<IntegerTuple> {
+public class IntegerTuple implements WritableComparable<IntegerTuple> {
private List<Integer> tuple = new ArrayList<Integer>();
@@ -78,7 +77,7 @@
* Replaces the string at the given index with the given newString
*
* @param index
- * @param newString
+ * @param newInteger
* @return The previous value at that location
*/
public Integer replaceAt(int index, Integer newInteger) {
@@ -106,7 +105,7 @@
@Override
public String toString() {
return tuple.toString();
- };
+ }
@Override
public int hashCode() {
@@ -150,14 +149,22 @@
@Override
public int compareTo(IntegerTuple otherTuple) {
- int min = Math.min(this.length(), otherTuple.length());
+ int thisLength = length();
+ int otherLength = otherTuple.length();
+ int min = Math.min(thisLength, otherLength);
for (int i = 0; i < min; i++) {
int ret = this.tuple.get(i).compareTo(otherTuple.integerAt(i));
if (ret == 0)
continue;
return ret;
}
- return this.length() - otherTuple.length();
+ if (thisLength < otherLength) {
+ return -1;
+ } else if (thisLength > otherLength) {
+ return 1;
+ } else {
+ return 0;
+ }
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Tue Oct 20 14:04:55 2009
@@ -35,7 +35,7 @@
* operations that deals with the vectors (subset, count,...)
*
*/
-public class Data {
+public class Data implements Cloneable {
private final List<Instance> instances;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/partial/Step0Job.java Tue Oct 20 14:04:55 2009
@@ -49,7 +49,7 @@
* preparation step of the partial mapreduce builder. Computes some stats that
* will be used by the builder.
*/
-public class Step0Job {
+public class Step0Job implements Cloneable {
/** directory that will hold this job's output */
private final Path outputPath;
@@ -244,7 +244,7 @@
* Output of the step0's mappers
*
*/
- protected static class Step0Output implements Writable,
+ public static class Step0Output implements Writable,
Comparable<Step0Output> {
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java Tue Oct 20 14:04:55 2009
@@ -30,7 +30,7 @@
* Used by various implementation to return the results of a build.<br>
* Contains a grown tree and and its oob predictions.
*/
-public class MapredOutput implements Writable {
+public class MapredOutput implements Writable, Cloneable {
private Node tree;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java Tue Oct 20 14:04:55 2009
@@ -22,7 +22,7 @@
/**
* Indicates both the tree and the data partition used to grow the tree
*/
-public class TreeID extends LongWritable {
+public class TreeID extends LongWritable implements Cloneable {
public static final int MAX_TREEID = 100000;
public TreeID() {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java Tue Oct 20 14:04:55 2009
@@ -39,10 +39,9 @@
protected void reduce(Text key, Iterable<TopKStringPatterns> values,
Context context) throws IOException, InterruptedException {
TopKStringPatterns patterns = new TopKStringPatterns();
- Iterator<TopKStringPatterns> it = values.iterator();
- while (it.hasNext()) {
+ for (TopKStringPatterns value : values) {
context.setStatus("Aggregator Reducer: Selecting TopK patterns for: " + key);
- patterns = patterns.merge(it.next(), maxHeapSize);
+ patterns = patterns.merge(value, maxHeapSize);
}
context.write(key, patterns);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Tue Oct 20 14:04:55 2009
@@ -57,10 +57,13 @@
*
*/
public class PFPGrowth {
- public static Pattern SPLITTER = Pattern.compile("[ ,\t]*[,|\t][ ,\t]*");
+ public static final Pattern SPLITTER = Pattern.compile("[ ,\t]*[,|\t][ ,\t]*");
private static final Logger log = LoggerFactory.getLogger(PFPGrowth.class);
+ private PFPGrowth() {
+ }
+
/**
* Generates the fList from the serialized string representation
*
@@ -73,8 +76,7 @@
public static List<Pair<String, Long>> deserializeList(Parameters params,
String key, Configuration conf) throws IOException {
List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>();
- conf
- .set(
+ conf.set(
"io.serializations",
"org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
@@ -258,12 +260,12 @@
Integer numGroups = Integer.valueOf(params.get("numGroups", "50"));
Map<String, Long> gList = new HashMap<String, Long>();
- long groupID = 0;
- long i = 0;
long maxPerGroup = fList.size() / numGroups;
if (fList.size() != maxPerGroup * numGroups)
- maxPerGroup = maxPerGroup + 1;
+ maxPerGroup++;
+ long i = 0;
+ long groupID = 0;
for (Pair<String, Long> featureFreq : fList) {
String feature = featureFreq.getFirst();
if (i / (maxPerGroup) == groupID) {
@@ -374,14 +376,12 @@
*/
private static String serializeList(List<Pair<String, Long>> list,
Configuration conf) throws IOException {
- conf
- .set(
+ conf.set(
"io.serializations",
"org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
DefaultStringifier<List<Pair<String, Long>>> listStringifier = new DefaultStringifier<List<Pair<String, Long>>>(
conf, GenericsUtil.getClass(list));
- String serializedListString = listStringifier.toString(list);
- return serializedListString;
+ return listStringifier.toString(list);
}
/**
@@ -394,8 +394,7 @@
*/
private static String serializeMap(Map<String, Long> map, Configuration conf)
throws IOException {
- conf
- .set(
+ conf.set(
"io.serializations",
"org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
DefaultStringifier<Map<String, Long>> mapStringifier = new DefaultStringifier<Map<String, Long>>(
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingReducer.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingReducer.java Tue Oct 20 14:04:55 2009
@@ -22,7 +22,6 @@
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
-import java.util.Iterator;
/**
* {@link ParallelCountingReducer} sums up the item count and output the item
@@ -32,13 +31,13 @@
public class ParallelCountingReducer extends
Reducer<Text, LongWritable, Text, LongWritable> {
+ @Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long sum = 0;
- Iterator<LongWritable> it = values.iterator();
- while (it.hasNext()) {
+ for (LongWritable value : values) {
context.setStatus("Parallel Counting Reducer :" + key);
- sum += it.next().get();
+ sum += value.get();
}
context.setStatus("Parallel Counting Reducer: " + key + " => " + sum);
context.write(key, new LongWritable(sum));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Tue Oct 20 14:04:55 2009
@@ -43,9 +43,9 @@
public class ParallelFPGrowthMapper extends
Mapper<LongWritable, Text, LongWritable, IntegerTuple> {
- private Map<String, Integer> fMap = new HashMap<String, Integer>();
+ private final Map<String, Integer> fMap = new HashMap<String, Integer>();
- private Map<Integer, Long> gListInt = new HashMap<Integer, Long>();
+ private final Map<Integer, Long> gListInt = new HashMap<Integer, Long>();
private Pattern splitter = null;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Tue Oct 20 14:04:55 2009
@@ -48,13 +48,13 @@
public class ParallelFPGrowthReducer extends
Reducer<LongWritable, IntegerTuple, Text, TopKStringPatterns> {
- private List<Pair<Integer, Long>> fList = new ArrayList<Pair<Integer, Long>>();
+ private final List<Pair<Integer, Long>> fList = new ArrayList<Pair<Integer, Long>>();
- private List<String> featureReverseMap = new ArrayList<String>();
+ private final List<String> featureReverseMap = new ArrayList<String>();
- private Map<String, Integer> fMap = new HashMap<String, Integer>();
+ private final Map<String, Integer> fMap = new HashMap<String, Integer>();
- private Map<Long, List<Integer>> groupFeatures = new HashMap<Long, List<Integer>>();
+ private final Map<Long, List<Integer>> groupFeatures = new HashMap<Long, List<Integer>>();
private int maxHeapSize = 50;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ContextWriteOutputCollector.java Tue Oct 20 14:04:55 2009
@@ -31,10 +31,9 @@
private static final Logger log = LoggerFactory
.getLogger(ContextWriteOutputCollector.class);
- private Reducer<IK, IV, K, V>.Context context;
+ private final Reducer<IK, IV, K, V>.Context context;
- public ContextWriteOutputCollector(Reducer<IK, IV, K, V>.Context context)
- throws IOException {
+ public ContextWriteOutputCollector(Reducer<IK, IV, K, V>.Context context) {
this.context = context;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/SequenceFileOutputCollector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/SequenceFileOutputCollector.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/SequenceFileOutputCollector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/SequenceFileOutputCollector.java Tue Oct 20 14:04:55 2009
@@ -25,10 +25,9 @@
public class SequenceFileOutputCollector<K extends Writable, V extends Writable>
implements OutputCollector<K, V> {
- private SequenceFile.Writer writer;
+ private final SequenceFile.Writer writer;
- public SequenceFileOutputCollector(SequenceFile.Writer writer)
- throws IOException {
+ public SequenceFileOutputCollector(SequenceFile.Writer writer) {
this.writer = writer;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TopKPatternsOutputConvertor.java Tue Oct 20 14:04:55 2009
@@ -26,30 +26,30 @@
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FrequentPatternMaxHeap;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.Pattern;
-public final class TopKPatternsOutputConvertor<AttributePrimitive> implements
+public final class TopKPatternsOutputConvertor<A> implements
OutputCollector<Integer, FrequentPatternMaxHeap> {
- private OutputCollector<AttributePrimitive, List<Pair<List<AttributePrimitive>, Long>>> collector = null;
+ private OutputCollector<A, List<Pair<List<A>, Long>>> collector = null;
- private Map<Integer, AttributePrimitive> reverseMapping = null;
+ private Map<Integer, A> reverseMapping = null;
public TopKPatternsOutputConvertor(
- OutputCollector<AttributePrimitive, List<Pair<List<AttributePrimitive>, Long>>> collector,
- Map<Integer, AttributePrimitive> reverseMapping) {
+ OutputCollector<A, List<Pair<List<A>, Long>>> collector,
+ Map<Integer, A> reverseMapping) {
this.collector = collector;
this.reverseMapping = reverseMapping;
}
@Override
- public final void collect(Integer key, FrequentPatternMaxHeap value)
+ public void collect(Integer key, FrequentPatternMaxHeap value)
throws IOException {
- List<Pair<List<AttributePrimitive>, Long>> perAttributePatterns = new ArrayList<Pair<List<AttributePrimitive>, Long>>();
+ List<Pair<List<A>, Long>> perAttributePatterns = new ArrayList<Pair<List<A>, Long>>();
for (Pattern itemSet : value.getHeap()) {
- List<AttributePrimitive> frequentPattern = new ArrayList<AttributePrimitive>();
+ List<A> frequentPattern = new ArrayList<A>();
for (int j = 0; j < itemSet.length(); j++) {
frequentPattern.add(reverseMapping.get(itemSet.getPattern()[j]));
}
- Pair<List<AttributePrimitive>, Long> returnItemSet = new Pair<List<AttributePrimitive>, Long>(
+ Pair<List<A>, Long> returnItemSet = new Pair<List<A>, Long>(
frequentPattern, itemSet.support());
perAttributePatterns.add(returnItemSet);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Tue Oct 20 14:04:55 2009
@@ -21,15 +21,15 @@
import java.util.List;
import java.util.Map;
-public class TransactionIterator<AttributePrimitive> implements Iterator<int[]> {
- private Map<AttributePrimitive, Integer> attributeIdMapping = null;
+public class TransactionIterator<AP> implements Iterator<int[]> {
+ private Map<AP, Integer> attributeIdMapping = null;
- private Iterator<List<AttributePrimitive>> iterator = null;
+ private Iterator<List<AP>> iterator = null;
private int[] transactionBuffer = null;
- public TransactionIterator(Iterator<List<AttributePrimitive>> iterator,
- Map<AttributePrimitive, Integer> attributeIdMapping) {
+ public TransactionIterator(Iterator<List<AP>> iterator,
+ Map<AP, Integer> attributeIdMapping) {
this.attributeIdMapping = attributeIdMapping;
this.iterator = iterator;
transactionBuffer = new int[attributeIdMapping.size()];
@@ -42,9 +42,9 @@
@Override
public final int[] next() {
- List<AttributePrimitive> transaction = iterator.next();
+ List<AP> transaction = iterator.next();
int index = 0;
- for (AttributePrimitive Attribute : transaction) {
+ for (AP Attribute : transaction) {
if (attributeIdMapping.containsKey(Attribute)) {
transactionBuffer[index++] = attributeIdMapping.get(Attribute);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerStringOutputConvertor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerStringOutputConvertor.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerStringOutputConvertor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerStringOutputConvertor.java Tue Oct 20 14:04:55 2009
@@ -40,7 +40,7 @@
}
@Override
- public final void collect(Integer key, List<Pair<List<Integer>, Long>> value)
+ public void collect(Integer key, List<Pair<List<Integer>, Long>> value)
throws IOException {
String StringKey = featureReverseMap.get(key);
List<Pair<List<String>, Long>> stringValues = new ArrayList<Pair<List<String>, Long>>();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerTupleIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerTupleIterator.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerTupleIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/IntegerTupleIterator.java Tue Oct 20 14:04:55 2009
@@ -31,18 +31,18 @@
}
@Override
- public final boolean hasNext() {
+ public boolean hasNext() {
return iterator.hasNext();
}
@Override
- public final List<Integer> next() {
+ public List<Integer> next() {
IntegerTuple transaction = iterator.next();
return transaction.getEntries();
}
@Override
- public final void remove() {
+ public void remove() {
iterator.remove();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringOutputConvertor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringOutputConvertor.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringOutputConvertor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringOutputConvertor.java Tue Oct 20 14:04:55 2009
@@ -34,7 +34,7 @@
}
@Override
- public final void collect(String key, List<Pair<List<String>, Long>> value)
+ public void collect(String key, List<Pair<List<String>, Long>> value)
throws IOException {
collector.collect(new Text(key), new TopKStringPatterns(value));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringTupleIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringTupleIterator.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringTupleIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/StringTupleIterator.java Tue Oct 20 14:04:55 2009
@@ -31,18 +31,18 @@
}
@Override
- public final boolean hasNext() {
+ public boolean hasNext() {
return iterator.hasNext();
}
@Override
- public final List<String> next() {
+ public List<String> next() {
StringTuple transaction = iterator.next();
return transaction.getEntries();
}
@Override
- public final void remove() {
+ public void remove() {
iterator.remove();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java Tue Oct 20 14:04:55 2009
@@ -40,15 +40,15 @@
frequentPatterns.addAll(patterns);
}
- public final Iterator<Pair<List<String>, Long>> iterator() {
+ public Iterator<Pair<List<String>, Long>> iterator() {
return frequentPatterns.iterator();
}
- public final List<Pair<List<String>, Long>> getPatterns() {
+ public List<Pair<List<String>, Long>> getPatterns() {
return frequentPatterns;
}
- public final TopKStringPatterns merge(TopKStringPatterns pattern, int heapSize) {
+ public TopKStringPatterns merge(TopKStringPatterns pattern, int heapSize) {
List<Pair<List<String>, Long>> patterns = new ArrayList<Pair<List<String>, Long>>();
Iterator<Pair<List<String>, Long>> myIterator = frequentPatterns.iterator();
Iterator<Pair<List<String>, Long>> otherIterator = pattern.iterator();
@@ -67,9 +67,9 @@
for (int j = 0; j < myItem.getFirst().size(); j++) {
cmp = myItem.getFirst().get(j).compareTo(
otherItem.getFirst().get(j));
- if (cmp == 0)
- continue;
- else break;
+ if (cmp != 0) {
+ break;
+ }
}
}
}
@@ -96,7 +96,7 @@
}
@Override
- public final void readFields(DataInput in) throws IOException {
+ public void readFields(DataInput in) throws IOException {
frequentPatterns.clear();
int length = in.readInt();
for (int i = 0; i < length; i++) {
@@ -114,7 +114,7 @@
}
@Override
- public final void write(DataOutput out) throws IOException {
+ public void write(DataOutput out) throws IOException {
out.writeInt(frequentPatterns.size());
for (Pair<List<String>, Long> pattern : frequentPatterns) {
out.writeInt(pattern.getFirst().size());
@@ -128,7 +128,7 @@
}
@Override
- public final String toString() {
+ public String toString() {
StringBuilder sb = new StringBuilder();
String sep = "";
for (Pair<List<String>, Long> pattern : frequentPatterns) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Tue Oct 20 14:04:55 2009
@@ -28,7 +28,6 @@
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
-import java.util.TreeSet;
import java.util.Map.Entry;
import org.apache.commons.lang.mutable.MutableLong;
import org.apache.hadoop.conf.Configuration;
@@ -47,9 +46,9 @@
/**
* PFPGrowth Class has both vanilla FPGrowth and Top K FPGrowth
*
- * @param <AttributePrimitive>
+ * @param <A>
*/
-public class FPGrowth<AttributePrimitive extends Comparable<? super AttributePrimitive>> {
+public class FPGrowth<A extends Comparable<? super A>> {
private static final Logger log = LoggerFactory.getLogger(FPGrowth.class);
@@ -77,31 +76,30 @@
* @return the List of features and their associated frequency as a Pair
* @throws IOException
*/
- public final List<Pair<AttributePrimitive, Long>> generateFList(
- Iterator<List<AttributePrimitive>> transactions, int minSupport)
- throws IOException {
+ public final List<Pair<A, Long>> generateFList(
+ Iterator<List<A>> transactions, int minSupport) {
- final Map<AttributePrimitive, MutableLong> AttributeSupport = new HashMap<AttributePrimitive, MutableLong>();
- int count = 0;
+ Map<A, MutableLong> AttributeSupport = new HashMap<A, MutableLong>();
+ //int count = 0;
while (transactions.hasNext()) {
- List<AttributePrimitive> transaction = transactions.next();
- for (AttributePrimitive attribute : transaction) {
+ List<A> transaction = transactions.next();
+ for (A attribute : transaction) {
if (AttributeSupport.containsKey(attribute) == false)
AttributeSupport.put(attribute, new MutableLong(1));
else
AttributeSupport.get(attribute).increment();
- count++;
+ //count++;
}
}
- List<Pair<AttributePrimitive, Long>> fList = new ArrayList<Pair<AttributePrimitive, Long>>();
- for (Entry<AttributePrimitive, MutableLong> e : AttributeSupport.entrySet())
- fList.add(new Pair<AttributePrimitive, Long>(e.getKey(), e.getValue()
+ List<Pair<A, Long>> fList = new ArrayList<Pair<A, Long>>();
+ for (Entry<A, MutableLong> e : AttributeSupport.entrySet())
+ fList.add(new Pair<A, Long>(e.getKey(), e.getValue()
.longValue()));
- Collections.sort(fList, new Comparator<Pair<AttributePrimitive, Long>>() {
+ Collections.sort(fList, new Comparator<Pair<A, Long>>() {
@Override
- public int compare(Pair<AttributePrimitive, Long> o1,
- Pair<AttributePrimitive, Long> o2) {
+ public int compare(Pair<A, Long> o1,
+ Pair<A, Long> o2) {
int ret = o2.getSecond().compareTo(o1.getSecond());
if (ret != 0)
return ret;
@@ -129,42 +127,41 @@
* @throws IOException
*/
public final void generateTopKFrequentPatterns(
- Iterator<List<AttributePrimitive>> transactionStream,
- List<Pair<AttributePrimitive, Long>> frequencyList,
+ Iterator<List<A>> transactionStream,
+ List<Pair<A, Long>> frequencyList,
long minSupport,
int K,
- Set<AttributePrimitive> returnableFeatures,
- OutputCollector<AttributePrimitive, List<Pair<List<AttributePrimitive>, Long>>> output)
+ Set<A> returnableFeatures,
+ OutputCollector<A, List<Pair<List<A>, Long>>> output)
throws IOException {
- Map<Integer, AttributePrimitive> reverseMapping = new HashMap<Integer, AttributePrimitive>();
- Map<AttributePrimitive, Integer> attributeIdMapping = new HashMap<AttributePrimitive, Integer>();
+ Map<Integer, A> reverseMapping = new HashMap<Integer, A>();
+ Map<A, Integer> attributeIdMapping = new HashMap<A, Integer>();
int id = 0;
- for (Pair<AttributePrimitive, Long> feature : frequencyList) {
- AttributePrimitive attrib = feature.getFirst();
+ for (Pair<A, Long> feature : frequencyList) {
+ A attrib = feature.getFirst();
Long frequency = feature.getSecond();
- if (frequency.longValue() < minSupport)
+ if (frequency < minSupport)
continue;
- attributeIdMapping.put(attrib, Integer.valueOf(id));
- reverseMapping.put(Integer.valueOf(id++), attrib);
+ attributeIdMapping.put(attrib, id);
+ reverseMapping.put(id++, attrib);
}
- final long[] attributeFrequency = new long[attributeIdMapping.size()];
- for (Pair<AttributePrimitive, Long> feature : frequencyList) {
- AttributePrimitive attrib = feature.getFirst();
+ long[] attributeFrequency = new long[attributeIdMapping.size()];
+ for (Pair<A, Long> feature : frequencyList) {
+ A attrib = feature.getFirst();
Long frequency = feature.getSecond();
- if (frequency.longValue() < minSupport)
+ if (frequency < minSupport)
break;
- attributeFrequency[attributeIdMapping.get(attrib)] = frequency
- .longValue();
+ attributeFrequency[attributeIdMapping.get(attrib)] = frequency;
}
log.info("Number of unique items {}", frequencyList.size());
Set<Integer> returnFeatures = new HashSet<Integer>();
if (returnableFeatures.isEmpty() == false) {
- for (AttributePrimitive attrib : returnableFeatures) {
+ for (A attrib : returnableFeatures) {
if (attributeIdMapping.containsKey(attrib)) {
returnFeatures.add(attributeIdMapping.get(attrib));
log.info("Adding Pattern {}=>{}", attrib, attributeIdMapping
@@ -177,10 +174,10 @@
}
log.info("Number of unique pruned items {}", attributeIdMapping.size());
- generateTopKFrequentPatterns(new TransactionIterator<AttributePrimitive>(
+ generateTopKFrequentPatterns(new TransactionIterator<A>(
transactionStream, attributeIdMapping), attributeFrequency, minSupport,
K, reverseMapping.size(), returnFeatures,
- new TopKPatternsOutputConvertor<AttributePrimitive>(output,
+ new TopKPatternsOutputConvertor<A>(output,
reverseMapping));
}
@@ -188,18 +185,18 @@
/**
* Top K FpGrowth Algorithm
*
- * @param TransactionTree to be mined
+ * @param tree to be mined
* @param minSupportMutable minimum support of the pattern to keep
* @param K Number of top frequent patterns to keep
* @param requiredFeatures Set of integer id's of features to mine
* @param outputCollector the Collector class which converts the given
- * frequent pattern in integer to AttributePrimitive
+ * frequent pattern in integer to A
* @return Top K Frequent Patterns for each feature and their support
* @throws IOException
*/
- private final Map<Integer, FrequentPatternMaxHeap> fpGrowth(FPTree tree,
+ private Map<Integer, FrequentPatternMaxHeap> fpGrowth(FPTree tree,
MutableLong minSupportMutable, int K, Set<Integer> requiredFeatures,
- TopKPatternsOutputConvertor<AttributePrimitive> outputCollector)
+ TopKPatternsOutputConvertor<A> outputCollector)
throws IOException {
int minSupportValue = minSupportMutable.intValue();
@@ -226,26 +223,22 @@
return Patterns;
}
- private FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int K,
+ private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int K,
MutableLong minSupportMutable) {
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
int tempNode = FPTree.ROOTNODEID;
Pattern frequentItem = new Pattern();
- while (true) {
- if (tree.childCount(tempNode) == 0) {
- break;
- } else {
- if (tree.childCount(tempNode) > 1) {
- log.info("This should not happen {} {}", tree.childCount(tempNode),
- tempNode);
- }
- tempNode = tree.childAtIndex(tempNode, 0);
- if (tree.count(tempNode) < minSupportMutable.intValue()) {
- continue;
- }
- frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
+ while (tree.childCount(tempNode) != 0) {
+ if (tree.childCount(tempNode) > 1) {
+ log.info("This should not happen {} {}", tree.childCount(tempNode),
+ tempNode);
+ }
+ tempNode = tree.childAtIndex(tempNode, 0);
+ if (tree.count(tempNode) < minSupportMutable.intValue()) {
+ continue;
}
+ frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
}
if (frequentItem.length() > 0) {
frequentPatterns.insert(frequentItem);
@@ -256,7 +249,7 @@
/**
* Internal TopKFrequentPattern Generation algorithm, which represents the
- * AttributePrimitives as integers and transforms features to use only
+ * A's as integers and transforms features to use only
* integers
*
* @param transactions Transaction database Iterator
@@ -269,18 +262,18 @@
* have to be mined
* @param topKPatternsOutputCollector the outputCollector which transforms the
* given Pattern in integer format to the corresponding
- * AttributePrimitive Format
+ * A Format
* @return Top K frequent patterns for each attribute
* @throws IOException
*/
- private final Map<Integer, FrequentPatternMaxHeap> generateTopKFrequentPatterns(
+ private Map<Integer, FrequentPatternMaxHeap> generateTopKFrequentPatterns(
Iterator<int[]> transactions,
long[] attributeFrequency,
long minSupport,
int K,
int featureSetSize,
Set<Integer> returnFeatures,
- TopKPatternsOutputConvertor<AttributePrimitive> topKPatternsOutputCollector)
+ TopKPatternsOutputConvertor<A> topKPatternsOutputCollector)
throws IOException {
FPTree tree = new FPTree(featureSetSize);
@@ -291,12 +284,12 @@
// Constructing initial FPTree from the list of transactions
MutableLong minSupportMutable = new MutableLong(minSupport);
int nodecount = 0;
- int attribcount = 0;
+ //int attribcount = 0;
int i = 0;
while (transactions.hasNext()) {
int[] transaction = transactions.next();
Arrays.sort(transaction);
- attribcount += transaction.length;
+ //attribcount += transaction.length;
nodecount += treeAddCount(tree, transaction, 1, minSupportMutable,
attributeFrequency);
i++;
@@ -311,18 +304,19 @@
topKPatternsOutputCollector);
}
- private final FrequentPatternMaxHeap growth(FPTree tree,
+ private FrequentPatternMaxHeap growth(FPTree tree,
MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
int level, int currentAttribute) {
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
- FrequentPatternMaxHeap returnedPatterns = null;
int i = Arrays.binarySearch(tree.getHeaderTableAttributes(),
currentAttribute);
if (i < 0)
return frequentPatterns;
- int j = tree.getHeaderTableCount();
- while (i < j) {
+
+ FrequentPatternMaxHeap returnedPatterns;
+ int headerTableCount = tree.getHeaderTableCount();
+ while (i < headerTableCount) {
int attribute = tree.getAttributeAtIndex(i);
long count = tree.getHeaderSupportCount(attribute);
if (count < minSupportMutable.intValue())
@@ -358,11 +352,10 @@
return frequentPatterns;
}
- private final FrequentPatternMaxHeap growthBottomUp(FPTree tree,
+ private static FrequentPatternMaxHeap growthBottomUp(FPTree tree,
MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
int level, boolean conditionalOfCurrentAttribute, int currentAttribute) {
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
- FrequentPatternMaxHeap returnedPatterns = null;
if (conditionalOfCurrentAttribute == false) {
int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
currentAttribute);
@@ -378,6 +371,7 @@
if (tree.singlePath()) {
return generateSinglePathPatterns(tree, K, minSupportMutable);
}
+ FrequentPatternMaxHeap returnedPatterns;
for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
int attribute = tree.getAttributeAtIndex(i);
long count = tree.getHeaderSupportCount(attribute);
@@ -385,7 +379,7 @@
continue;
FPTree conditionalTree = treeCache.getTree(level);
- if (conditionalOfCurrentAttribute == true) {
+ if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
minSupportMutable, conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
@@ -422,11 +416,10 @@
return frequentPatterns;
}
- private final FrequentPatternMaxHeap growthTopDown(FPTree tree,
+ private FrequentPatternMaxHeap growthTopDown(FPTree tree,
MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
int level, boolean conditionalOfCurrentAttribute, int currentAttribute) {
FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
- FrequentPatternMaxHeap returnedPatterns = null;
if (conditionalOfCurrentAttribute == false) {
int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
currentAttribute);
@@ -442,6 +435,7 @@
if (tree.singlePath()) {
return generateSinglePathPatterns(tree, K, minSupportMutable);
}
+ FrequentPatternMaxHeap returnedPatterns;
for (int i = 0; i < tree.getHeaderTableCount(); i++) {
int attribute = tree.getAttributeAtIndex(i);
long count = tree.getHeaderSupportCount(attribute);
@@ -450,7 +444,7 @@
FPTree conditionalTree = treeCache.getTree(level);
- if (conditionalOfCurrentAttribute == true) {
+ if (conditionalOfCurrentAttribute) {
traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
minSupportMutable, conditionalTree, tree);
returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
@@ -487,16 +481,14 @@
return frequentPatterns;
}
- private final FrequentPatternMaxHeap mergeHeap(
+ private static FrequentPatternMaxHeap mergeHeap(
FrequentPatternMaxHeap frequentPatterns,
FrequentPatternMaxHeap returnedPatterns, int attribute, long count,
boolean addAttribute, boolean subPatternCheck) {
- TreeSet<Pattern> myHeap = returnedPatterns.getHeap();
- Iterator<Pattern> it = myHeap.iterator();
+ Set<Pattern> myHeap = returnedPatterns.getHeap();
- while (it.hasNext()) {
- Pattern pattern = it.next();
+ for (Pattern pattern : myHeap) {
long support = Math.min(count, pattern.support());
if (frequentPatterns.addable(support)) {
pattern.add(attribute, count);
@@ -511,7 +503,7 @@
return frequentPatterns;
}
- private void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
+ private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
MutableLong minSupportMutable, FPTree conditionalTree, FPTree tree) {
// Build Subtable
@@ -522,7 +514,7 @@
int pathNode = tree.parent(conditionalNode);
int prevConditional = -1;
- while (0 != pathNode) { // dummy root node
+ while (pathNode != 0) { // dummy root node
int attribute = tree.attribute(pathNode);
if (tree.getHeaderSupportCount(attribute) < minSupportMutable
.intValue()) {
@@ -573,7 +565,7 @@
* Create FPTree with node counts incremented by addCount variable given the
* root node and the List of Attributes in transaction sorted by support
*
- * @param TransactionTree object to which the transaction has to be added to
+ * @param tree object to which the transaction has to be added to
* @param myList List of transactions sorted by support
* @param addCount amount by which the Node count has to be incremented
* @param minSupport the MutableLong value which contains the current
@@ -581,15 +573,15 @@
* @param attributeFrequency the list of attributes and their frequency
* @return the number of new nodes added
*/
- private final int treeAddCount(FPTree tree, int[] myList, int addCount,
+ private static int treeAddCount(FPTree tree, int[] myList, int addCount,
MutableLong minSupport, long[] attributeFrequency) {
int temp = FPTree.ROOTNODEID;
int ret = 0;
boolean addCountMode = true;
- int child = -1;
for (int attribute : myList) {
if (attributeFrequency[attribute] < minSupport.intValue())
return ret;
+ int child;
if (addCountMode) {
child = tree.childWithAttribute(temp, attribute);
if (child == -1) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Tue Oct 20 14:04:55 2009
@@ -21,21 +21,21 @@
public class FPTree {
- public final static int DEFAULT_CHILDREN_INITIAL_SIZE = 2;
+ private static final int DEFAULT_CHILDREN_INITIAL_SIZE = 2;
- public final static int DEFAULT_HEADER_TABLE_INITIAL_SIZE = 4;
+ private static final int DEFAULT_HEADER_TABLE_INITIAL_SIZE = 4;
- public final static int DEFAULT_INITIAL_SIZE = 8;
+ private static final int DEFAULT_INITIAL_SIZE = 8;
- public final static int ROOTNODEID = 0;
+ public static final int ROOTNODEID = 0;
- private final static int HEADERTABLEBLOCKSIZE = 2;
+ private static final int HEADERTABLEBLOCKSIZE = 2;
- private final static int HT_LAST = 1;
+ private static final int HT_LAST = 1;
- private final static int HT_NEXT = 0;
+ private static final int HT_NEXT = 0;
- public final float GROWTH_RATE = 1.5f;
+ private static final float GROWTH_RATE = 1.5f;
private int[] attribute;
@@ -73,7 +73,7 @@
this(size, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
}
- public FPTree(int size, int headersize) {
+ private FPTree(int size, int headersize) {
if (size < DEFAULT_INITIAL_SIZE)
size = DEFAULT_INITIAL_SIZE;
@@ -307,7 +307,7 @@
return singlePath;
}
- private final int getHeaderIndex(int attribute) {
+ private int getHeaderIndex(int attribute) {
if (attribute >= headerTableLookup.length)
resizeHeaderLookup(attribute);
int index = headerTableLookup[attribute];
@@ -326,7 +326,7 @@
return index;
}
- private final void resize() {
+ private void resize() {
int size = (int) (GROWTH_RATE * nodes);
if (size < DEFAULT_INITIAL_SIZE)
size = DEFAULT_INITIAL_SIZE;
@@ -357,7 +357,7 @@
System.arraycopy(oldConditional, 0, this.conditional, 0, nodes);
}
- private final void resizeChildren(int nodeId) {
+ private void resizeChildren(int nodeId) {
int length = childCount[nodeId];
int size = (int) (GROWTH_RATE * (length));
if (size < DEFAULT_CHILDREN_INITIAL_SIZE)
@@ -367,7 +367,7 @@
System.arraycopy(oldNodeChildren, 0, this.nodeChildren[nodeId], 0, length);
}
- private final void resizeHeaderLookup(int attribute) {
+ private void resizeHeaderLookup(int attribute) {
int size = (int) (attribute * GROWTH_RATE);
int[] oldLookup = headerTableLookup;
headerTableLookup = new int[size];
@@ -375,7 +375,7 @@
System.arraycopy(oldLookup, 0, this.headerTableLookup, 0, oldLookup.length);
}
- private final void resizeHeaderTable() {
+ private void resizeHeaderTable() {
int size = (int) (GROWTH_RATE * (headerTableCount));
if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE)
size = DEFAULT_HEADER_TABLE_INITIAL_SIZE;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java Tue Oct 20 14:04:55 2009
@@ -45,7 +45,7 @@
private int misses = 0;
public final FPTree getFirstLevelTree(int attr) {
- Integer attribute = Integer.valueOf(attr);
+ Integer attribute = attr;
if (firstLevelCache.contains(attribute)) {
hits++;
return firstLevelCache.get(attribute);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Tue Oct 20 14:04:55 2009
@@ -28,9 +28,9 @@
*
*/
public class FrequentPatternMaxHeap {
- private Comparator<Pattern> treeSetComparator = new Comparator<Pattern>() {
+ private final Comparator<Pattern> treeSetComparator = new Comparator<Pattern>() {
@Override
- public final int compare(Pattern cr1, Pattern cr2) {
+ public int compare(Pattern cr1, Pattern cr2) {
long support2 = cr2.support();
long support1 = cr1.support();
int length2 = cr2.length();
@@ -68,9 +68,7 @@
public final boolean addable(long support) {
if (count < maxSize)
return true;
- if (least.support() > support)
- return false;
- return true;
+ return least.support() <= support;
}
public final TreeSet<Pattern> getHeap() {
@@ -104,8 +102,6 @@
int cmp = treeSetComparator.compare(least, frequentPattern);
if (cmp < 0)
least = frequentPattern;
- else if (cmp == 0)
- return;
} else if (least == null)
least = frequentPattern;
}
@@ -131,13 +127,13 @@
return super.toString();
}
- final private boolean addPattern(Pattern frequentPattern,
+ private boolean addPattern(Pattern frequentPattern,
boolean subPatternCheck) {
if (subPatternCheck == false) {
set.add(frequentPattern);
return true;
} else {
- Long index = Long.valueOf(frequentPattern.support());
+ Long index = frequentPattern.support();
if (patternIndex.containsKey(index)) {
Set<Pattern> indexSet = patternIndex.get(index);
boolean replace = false;
@@ -168,7 +164,7 @@
return true;
} else {
set.add(frequentPattern);
- Set<Pattern> patternList = null;
+ Set<Pattern> patternList;
if (patternIndex.containsKey(index) == false) {
patternList = new HashSet<Pattern>();
patternIndex.put(index, patternList);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Tue Oct 20 14:04:55 2009
@@ -21,9 +21,9 @@
public class Pattern {
- public static int DEFAULT_INITIAL_SIZE = 2;
+ private static final int DEFAULT_INITIAL_SIZE = 2;
- public static float GROWTH_RATE = 1.5f;
+ private static final float GROWTH_RATE = 1.5f;
private boolean dirty = true;
@@ -41,7 +41,7 @@
this(DEFAULT_INITIAL_SIZE);
}
- public Pattern(int size) {
+ private Pattern(int size) {
if (size < DEFAULT_INITIAL_SIZE)
size = DEFAULT_INITIAL_SIZE;
this.pattern = new int[size];
@@ -86,10 +86,10 @@
@Override
public int hashCode() {
- final int prime = 31;
if (dirty == false)
return hashCode;
int result = 1;
+ int prime = 31;
result = prime * result + Arrays.hashCode(pattern);
result = prime * result + Long.valueOf(support).hashCode();
hashCode = result;
@@ -99,24 +99,20 @@
public final boolean isSubPatternOf(Pattern frequentPattern) {
int[] otherPattern = frequentPattern.getPattern();
int otherLength = frequentPattern.length();
- int otherI = 0;
- int i = 0;
if (this.length() > frequentPattern.length())
return false;
+ int i = 0;
+ int otherI = 0;
while (i < length && otherI < otherLength) {
if (otherPattern[otherI] == pattern[i]) {
otherI++;
i++;
- continue;
} else if (otherPattern[otherI] < pattern[i]) {
otherI++;
} else
return false;
}
- if (otherI == otherLength && i != length)
- return false;
- return true;
-
+ return otherI != otherLength || i == length;
}
public final int length() {
@@ -131,10 +127,10 @@
public final String toString() {
int[] arr = new int[length];
System.arraycopy(pattern, 0, arr, 0, length);
- return Arrays.toString(arr) + "-" + support;
+ return Arrays.toString(arr) + '-' + support;
}
- private final void resize() {
+ private void resize() {
int size = (int) (GROWTH_RATE * length);
if (size < DEFAULT_INITIAL_SIZE)
size = DEFAULT_INITIAL_SIZE;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package.html?rev=827468&r1=827467&r2=827468&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package.html (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package.html Tue Oct 20 14:04:55 2009
@@ -36,8 +36,8 @@
<br>
e.g:
<pre>
- FPGrowth<String> fp = new FPGrowth<String>();
- Set<String> features = new HashSet<String>();
+ FPGrowth<String> fp = new FPGrowth<String>();
+ Set<String> features = new HashSet<String>();
fp.generateTopKStringFrequentPatterns(
new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
fp.generateFList(
@@ -45,7 +45,7 @@
minSupport,
maxHeapSize,
features,
- new StringOutputConvertor(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer))
+ new StringOutputConvertor(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer))
);
</pre>
<ul>
@@ -59,7 +59,7 @@
<p>The command line launcher for string transaction data <i>org.apache.mahout.fpm.pfpgrowth.FPGrowthJob</i> has other features including specifying the regex pattern for spitting a string line of a transaction into the constituent features</p>
The <i>numGroups</i> parameter in FPGrowthJob specifies the number of groups into which transactions have to be decomposed.
-The <i>numTreeCacheEntries</i> parameter specifies the number of generated conditional FP-Trees to be kept in memory so as not to regenerate them. Increasing this number increases the memory consumption but might improve speed until a certain point. This depends entirely on the dataset in question. A value of 5-10 is recommended for mining upto top 100 patterns for each feature
+The <i>numTreeCacheEntries</i> parameter specifies the number of generated conditional FP-Trees to be kept in memory so as not to regenerate them. Increasing this number increases the memory consumption but might improve speed until a certain point. This depends entirely on the dataset in question. A value of 5-10 is recommended for mining up to top 100 patterns for each feature
</DIV>
<DIV> </DIV>
<DIV align="center">