You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@mahout.apache.org by Robin Anil <ro...@gmail.com> on 2009/12/15 19:12:59 UTC
Re: svn commit: r890899 [1/2] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/
main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/ main/
Please dont format pfpgrowth package. It breaks all the new changes
and code implementations. Especially the fp-bonsai. I have currently
rejected the changes, because it had become un-mergable. I will put a
new diff soon.
On Tue, Dec 15, 2009 at 11:09 PM, <sr...@apache.org> wrote:
> Author: srowen
> Date: Tue Dec 15 17:39:18 2009
> New Revision: 890899
>
> URL: http://svn.apache.org/viewvc?rev=890899&view=rev
> Log:
> More style stuff; in particular also remove some references to deprecated JobConf where we can at the moment
>
> Modified:
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/IntPairWritable.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAState.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/Builder.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/MockLeaf.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
> lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
> lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapred/partial/PartitionBugTest.java
> lucene/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Tue Dec 15 17:39:18 2009
> @@ -34,7 +34,7 @@
>
> private final DataSource delegate;
>
> - public ConnectionPoolDataSource(final DataSource underlyingDataSource) {
> + public ConnectionPoolDataSource(DataSource underlyingDataSource) {
> if (underlyingDataSource == null) {
> throw new IllegalArgumentException("underlyingDataSource is null");
> }
> @@ -94,7 +94,7 @@
>
> private final DataSource underlyingDataSource;
>
> - public ConfiguringConnectionFactory(DataSource underlyingDataSource) {
> + ConfiguringConnectionFactory(DataSource underlyingDataSource) {
> this.underlyingDataSource = underlyingDataSource;
> }
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.bayes;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileStatus;
> import org.apache.hadoop.fs.FileSystem;
> @@ -30,7 +31,7 @@
> import org.apache.hadoop.mapred.KeyValueTextInputFormat;
> import org.apache.hadoop.mapred.SequenceFileOutputFormat;
> import org.apache.mahout.classifier.ConfusionMatrix;
> -import org.apache.mahout.classifier.bayes.common.BayesParameters;
> +import org.apache.mahout.common.Parameters;
> import org.apache.mahout.common.StringTuple;
> import org.slf4j.Logger;
> import org.slf4j.LoggerFactory;
> @@ -51,8 +52,8 @@
> *
> * @param params The Job parameters containing the gramSize, input output folders, defaultCat, encoding
> */
> - public static void runJob(BayesParameters params) throws IOException {
> - JobClient client = new JobClient();
> + public static void runJob(Parameters params) throws IOException {
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(BayesClassifierDriver.class);
> conf.setJobName("Bayes Classifier Driver running over input: " + params.get("testDirPath"));
> conf.setOutputKeyClass(StringTuple.class);
> @@ -69,7 +70,8 @@
> conf.setOutputFormat(SequenceFileOutputFormat.class);
>
> conf.set("io.serializations",
> - "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
> + "org.apache.hadoop.io.serializer.JavaSerialization," +
> + "org.apache.hadoop.io.serializer.WritableSerialization");
>
> FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
> if (dfs.exists(outPath)) {
> @@ -85,7 +87,10 @@
> log.info("{}",matrix.summarize());
> }
>
> - private static ConfusionMatrix readResult(FileSystem fs, Path pathPattern, Configuration conf, BayesParameters params)
> + private static ConfusionMatrix readResult(FileSystem fs,
> + Path pathPattern,
> + Configuration conf,
> + Parameters params)
> throws IOException {
>
> StringTuple key = new StringTuple();
> @@ -101,8 +106,9 @@
> String correctLabel = key.stringAt(1);
> String classifiedLabel = key.stringAt(2);
> Map<String, Integer> rowMatrix = confusionMatrix.get(correctLabel);
> - if(rowMatrix == null)
> - rowMatrix = new HashMap<String, Integer>();
> + if (rowMatrix == null) {
> + rowMatrix = new HashMap<String, Integer>();
> + }
> Integer count = Double.valueOf(value.get()).intValue();
> rowMatrix.put(classifiedLabel, count);
> confusionMatrix.put(correctLabel, rowMatrix);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.bayes;
>
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.mapred.JobConf;
> @@ -46,7 +47,7 @@
> @Override
> public void runJob(String input, String output, BayesParameters params)
> throws IOException, InterruptedException, ClassNotFoundException {
> - JobConf conf = new JobConf(BayesDriver.class);
> + Configuration conf = new JobConf(BayesDriver.class);
> Path outPath = new Path(output);
> FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
> if (dfs.exists(outPath)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.bayes;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.DefaultStringifier;
> @@ -51,7 +52,7 @@
> */
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(BayesThetaNormalizerDriver.class);
>
> conf.setJobName("Bayes Theta Normalizer Driver running over input: " + input);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.cbayes;
>
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.mapred.JobConf;
> @@ -45,7 +46,7 @@
> */
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException, InterruptedException, ClassNotFoundException {
> - JobConf conf = new JobConf(CBayesDriver.class);
> + Configuration conf = new JobConf(CBayesDriver.class);
> Path outPath = new Path(output);
> FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
> if (dfs.exists(outPath)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.cbayes;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.DefaultStringifier;
> @@ -51,7 +52,7 @@
> */
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);
> conf.setJobName("Complementary Bayes Theta Normalizer Driver running over input: " + input);
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.common;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.DoubleWritable;
> @@ -41,7 +42,7 @@
> */
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(BayesFeatureDriver.class);
> conf.setJobName("Bayes Feature Driver running over input: " + input);
> conf.setOutputKeyClass(StringTuple.class);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Tue Dec 15 17:39:18 2009
> @@ -128,7 +128,7 @@
> @Override
> public void configure(JobConf job) {
> try {
> - System.out.println("Bayes Parameter" + job.get("bayes.parameters"));
> + log.info("Bayes Parameter {}", job.get("bayes.parameters"));
> Parameters params = Parameters.fromString(job.get("bayes.parameters",""));
> gramSize = Integer.valueOf(params.get("gramSize"));
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.common;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> @@ -56,7 +57,7 @@
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException {
>
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(BayesWeightSummerDriver.class);
> conf.setJobName("TfIdf Driver running over input: " + input);
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.classifier.bayes.mapreduce.common;
>
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.DoubleWritable;
> @@ -41,7 +42,7 @@
> */
> @Override
> public void runJob(String input, String output, BayesParameters params) throws IOException {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(BayesWeightSummerDriver.class);
> conf.setJobName("Bayes Weight Summer Driver running over input: " + input);
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Tue Dec 15 17:39:18 2009
> @@ -39,9 +39,9 @@
> // the distance measure
> private DistanceMeasure measure;
>
> - private int nextClusterId = 0;
> + //private int nextClusterId = 0;
>
> - public CanopyClusterer(final DistanceMeasure measure, final double t1, final double t2) {
> + public CanopyClusterer(DistanceMeasure measure, double t1, double t2) {
> this.t1 = t1;
> this.t2 = t2;
> this.measure = measure;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.Text;
> @@ -133,7 +134,7 @@
> String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
> log.info("Input: " + input + " Out: " + output + " Measure: " + measureClassName + " t1: " + t1
> + " t2: " + t2 + " Vector Class: " + vectorClass.getSimpleName());
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(CanopyDriver.class);
> conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
> conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.Text;
> @@ -139,7 +140,7 @@
> */
> public static void runJob(String points, String canopies, String output,
> String measureClassName, double t1, double t2, Class<? extends Vector> vectorClass) throws IOException {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(ClusterDriver.class);
>
> conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.SequenceFile;
> @@ -187,7 +188,7 @@
> public static void runIteration(String input, String stateIn,
> String stateOut, String modelFactory, int numClusters, double alpha_0,
> int numReducers) {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(DirichletDriver.class);
>
> conf.setOutputKeyClass(Text.class);
> @@ -225,7 +226,7 @@
> * @param output the directory pathname for output points
> */
> public static void runClustering(String input, String stateIn, String output) {
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(DirichletDriver.class);
>
> conf.setOutputKeyClass(Text.class);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.mapred.JobConf;
> @@ -103,7 +104,7 @@
> throws IOException, ClassNotFoundException, InstantiationException,
> IllegalAccessException {
> // delete the output directory
> - JobConf conf = new JobConf(DirichletJob.class);
> + Configuration conf = new JobConf(DirichletJob.class);
> Path outPath = new Path(output);
> FileSystem fs = FileSystem.get(outPath.toUri(), conf);
> if (fs.exists(outPath)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java Tue Dec 15 17:39:18 2009
> @@ -38,7 +38,7 @@
> * When do we define a cluster to have converged?
> *
> * */
> - public FuzzyKMeansClusterer(final DistanceMeasure measure, double convergenceDelta, double m) {
> + public FuzzyKMeansClusterer(DistanceMeasure measure, double convergenceDelta, double m) {
> this.measure = measure;
> this.convergenceDelta = convergenceDelta;
> this.m = m;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Tue Dec 15 17:39:18 2009
> @@ -67,20 +67,20 @@
> * when the string is wrongly formatted
> */
> public static Cluster decodeCluster(String formattedString) {
> - final int beginIndex = formattedString.indexOf('{');
> + int beginIndex = formattedString.indexOf('{');
> if (beginIndex <= 0) {
> throw new IllegalArgumentException(ERROR_UNKNOWN_CLUSTER_FORMAT
> + formattedString);
> }
> - final String id = formattedString.substring(0, beginIndex);
> - final String center = formattedString.substring(beginIndex);
> - final char firstChar = id.charAt(0);
> - final boolean startsWithV = firstChar == 'V';
> - final Cluster cluster;
> + String id = formattedString.substring(0, beginIndex);
> + String center = formattedString.substring(beginIndex);
> + char firstChar = id.charAt(0);
> + boolean startsWithV = firstChar == 'V';
> + Cluster cluster;
> if (firstChar == 'C' || startsWithV) {
> - final int clusterId = Integer.parseInt(formattedString.substring(1,
> + int clusterId = Integer.parseInt(formattedString.substring(1,
> beginIndex - 2));
> - final Vector clusterCenter = AbstractVector.decodeVector(center);
> + Vector clusterCenter = AbstractVector.decodeVector(center);
> cluster = new Cluster(clusterCenter, clusterId);
> cluster.setConverged(startsWithV);
> } else {
> @@ -222,8 +222,8 @@
> * the convergence delta to use for stopping.
> * @return if the cluster is converged
> */
> - public boolean computeConvergence(final DistanceMeasure measure,
> - final double convergenceDelta) {
> + public boolean computeConvergence(DistanceMeasure measure,
> + double convergenceDelta) {
> Vector centroid = computeCentroid();
> converged = measure.distance(centroid.getLengthSquared(), centroid,
> getCenter()) <= convergenceDelta;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/IntPairWritable.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/IntPairWritable.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/IntPairWritable.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/IntPairWritable.java Tue Dec 15 17:39:18 2009
> @@ -20,6 +20,7 @@
> import java.io.DataInput;
> import java.io.DataOutput;
> import java.io.IOException;
> +import java.io.Serializable;
>
> import org.apache.hadoop.io.WritableComparable;
> import org.apache.hadoop.io.WritableComparator;
> @@ -106,7 +107,7 @@
> WritableComparator.define(IntPairWritable.class, new Comparator());
> }
>
> - public static class Comparator extends WritableComparator {
> + public static class Comparator extends WritableComparator implements Serializable {
> public Comparator() {
> super(IntPairWritable.class);
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java Tue Dec 15 17:39:18 2009
> @@ -36,6 +36,7 @@
> public class LDAInference {
>
> private static final double E_STEP_CONVERGENCE = 1.0E-6;
> + private static final int MAX_ITER = 20;
>
> public LDAInference(LDAState state) {
> this.state = state;
> @@ -102,7 +103,6 @@
> Map<Integer, Integer> columnMap = new HashMap<Integer, Integer>();
>
> int iteration = 0;
> - final int MAX_ITER = 20;
>
> boolean converged = false;
> double oldLL = 1;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAState.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAState.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAState.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAState.java Tue Dec 15 17:39:18 2009
> @@ -37,7 +37,7 @@
> }
>
> public double logProbWordGivenTopic(int word, int topic) {
> - final double logProb = topicWordProbabilities.getQuick(topic, word);
> + double logProb = topicWordProbabilities.getQuick(topic, word);
> return logProb == Double.NEGATIVE_INFINITY ? -100.0
> : logProb - logTotals[topic];
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Tue Dec 15 17:39:18 2009
> @@ -3,10 +3,10 @@
> public interface MeanShiftCanopyConfigKeys {
>
> // keys used by Driver, Mapper, Combiner & Reducer
> - public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
> - public static final String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
> - public static final String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
> - public static final String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
> - public static final String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
> + String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
> + String T1_KEY = "org.apache.mahout.clustering.canopy.t1";
> + String T2_KEY = "org.apache.mahout.clustering.canopy.t2";
> + String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path";
> + String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence";
>
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configurable;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.FileInputFormat;
> @@ -103,7 +104,7 @@
> *
> * @param input the input pathname String
> * @param output the output pathname String
> - * @param control TODO
> + * @param control the control path
> * @param measureClassName the DistanceMeasure class name
> * @param t1 the T1 distance threshold
> * @param t2 the T2 distance threshold
> @@ -112,7 +113,7 @@
> public static void runJob(String input, String output, String control,
> String measureClassName, double t1, double t2, double convergenceDelta) {
>
> - JobClient client = new JobClient();
> + Configurable client = new JobClient();
> JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
>
> conf.setOutputKeyClass(Text.class);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.commons.cli2.builder.DefaultOptionBuilder;
> import org.apache.commons.cli2.builder.GroupBuilder;
> import org.apache.commons.cli2.commandline.Parser;
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.mapred.JobConf;
> @@ -112,7 +113,7 @@
> String measureClassName, double t1, double t2, double convergenceDelta,
> int maxIterations) throws IOException {
> // delete the output directory
> - JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
> + Configuration conf = new JobConf(MeanShiftCanopyDriver.class);
> Path outPath = new Path(output);
> FileSystem fs = FileSystem.get(outPath.toUri(), conf);
> if (fs.exists(outPath)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.common;
>
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.mapred.JobConf;
> @@ -35,7 +36,7 @@
> }
>
> public static void overwriteOutput(String output) throws IOException {
> - JobConf conf = new JobConf(KMeansDriver.class);
> + Configuration conf = new JobConf(KMeansDriver.class);
> Path outPath = new Path(output);
> FileSystem fs = FileSystem.get(outPath.toUri(), conf);
> if (fs.exists(outPath)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java Tue Dec 15 17:39:18 2009
> @@ -31,7 +31,7 @@
>
> private PriorityQueue<K> queue = null;
>
> - public LeastKCache(final int capacity) {
> + public LeastKCache(int capacity) {
>
> this.capacity = capacity;
>
> @@ -48,17 +48,17 @@
> }
>
> @Override
> - final public long capacity() {
> + public final long capacity() {
> return capacity;
> }
>
> @Override
> - final public V get(K key) {
> + public final V get(K key) {
> return cache.get(key);
> }
>
> @Override
> - final public void set(K key, V value) {
> + public final void set(K key, V value) {
> if (contains(key) == false) {
> queue.add(key);
> }
> @@ -70,12 +70,12 @@
> }
>
> @Override
> - final public long size() {
> + public final long size() {
> return cache.size();
> }
>
> @Override
> - final public boolean contains(K key) {
> + public final boolean contains(K key) {
> return (cache.containsKey(key));
> }
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/AbstractParameter.java Tue Dec 15 17:39:18 2009
> @@ -17,6 +17,7 @@
>
> package org.apache.mahout.common.parameters;
>
> +import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.mapred.JobConf;
>
> import java.util.Collection;
> @@ -53,7 +54,7 @@
> return Collections.emptyList();
> }
>
> - protected AbstractParameter(Class<T> type, String prefix, String name, JobConf jobConf, T defaultValue, String description) {
> + protected AbstractParameter(Class<T> type, String prefix, String name, Configuration jobConf, T defaultValue, String description) {
> this.type = type;
> this.name = name;
> this.description = description;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/Builder.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/Builder.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/Builder.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapred/Builder.java Tue Dec 15 17:39:18 2009
> @@ -206,7 +206,7 @@
> * @return
> * @throws IOException
> */
> - public static Path getDistributedCacheFile(JobConf job, int index)
> + public static Path getDistributedCacheFile(Configuration job, int index)
> throws IOException {
> URI[] files = DistributedCache.getCacheFiles(job);
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java Tue Dec 15 17:39:18 2009
> @@ -25,6 +25,7 @@
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.SequenceFile.Reader;
> import org.apache.hadoop.mapreduce.Job;
> +import org.apache.hadoop.mapreduce.JobContext;
> import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
> import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
> @@ -147,7 +148,7 @@
> * @param callback can be null
> * @throws IOException
> */
> - protected static void processOutput(Job job, Path outputPath,
> + protected static void processOutput(JobContext job, Path outputPath,
> int[] firstIds, TreeID[] keys, Node[] trees, PredictionCallback callback)
> throws IOException {
> if ((keys != null && trees == null)||(keys == null && trees != null)) {
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java Tue Dec 15 17:39:18 2009
> @@ -36,6 +36,7 @@
> import org.apache.hadoop.io.Writable;
> import org.apache.hadoop.io.SequenceFile.Reader;
> import org.apache.hadoop.mapreduce.Job;
> +import org.apache.hadoop.mapreduce.JobContext;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
> import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> @@ -117,7 +118,7 @@
> * @return info for each partition in Hadoop's order
> * @throws IOException
> */
> - protected Step0Output[] parseOutput(Job job) throws IOException {
> + protected Step0Output[] parseOutput(JobContext job) throws IOException {
> Configuration conf = job.getConfiguration();
>
> log.info("mapred.map.tasks = " + conf.getInt("mapred.map.tasks", -1));
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java Tue Dec 15 17:39:18 2009
> @@ -77,6 +77,11 @@
> }
>
> @Override
> + protected Type getType() {
> + return Type.CATEGORICAL;
> + }
> +
> + @Override
> public boolean equals(Object obj) {
> if (this == obj)
> return true;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java Tue Dec 15 17:39:18 2009
> @@ -65,6 +65,11 @@
> }
>
> @Override
> + protected Type getType() {
> + return Type.LEAF;
> + }
> +
> + @Override
> public boolean equals(Object obj) {
> if (this == obj)
> return true;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/MockLeaf.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/MockLeaf.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/MockLeaf.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/MockLeaf.java Tue Dec 15 17:39:18 2009
> @@ -31,6 +31,11 @@
> }
>
> @Override
> + protected Type getType() {
> + return Type.MOCKLEAF;
> + }
> +
> + @Override
> protected String getString() {
> return "[MockLeaf]";
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java Tue Dec 15 17:39:18 2009
> @@ -29,7 +29,7 @@
> */
> public abstract class Node implements Writable {
>
> - protected enum NODE_TYPE {
> + protected enum Type {
> MOCKLEAF, LEAF, NUMERICAL, CATEGORICAL
> }
>
> @@ -55,28 +55,10 @@
> */
> public abstract long maxDepth();
>
> - /**
> - * converts the node implementation into an int code
> - *
> - * @return
> - */
> - private int node2Type() {
> - if (this instanceof MockLeaf) {
> - return NODE_TYPE.MOCKLEAF.ordinal();
> - } else if (this instanceof Leaf) {
> - return NODE_TYPE.LEAF.ordinal();
> - } else if (this instanceof NumericalNode) {
> - return NODE_TYPE.NUMERICAL.ordinal();
> - } else if (this instanceof CategoricalNode) {
> - return NODE_TYPE.CATEGORICAL.ordinal();
> - } else {
> - throw new IllegalStateException(
> - "This implementation is not currently supported");
> - }
> - }
> + protected abstract Type getType();
>
> public static Node read(DataInput in) throws IOException {
> - NODE_TYPE type = NODE_TYPE.values()[in.readInt()];
> + Type type = Type.values()[in.readInt()];
> Node node;
>
> switch (type) {
> @@ -104,14 +86,14 @@
>
> @Override
> public final String toString() {
> - return node2Type() + ":" + getString() + ';';
> + return getType() + ":" + getString() + ';';
> }
>
> protected abstract String getString();
>
> @Override
> public final void write(DataOutput out) throws IOException {
> - out.writeInt(node2Type());
> + out.writeInt(getType().ordinal());
> writeNode(out);
> }
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java Tue Dec 15 17:39:18 2009
> @@ -68,6 +68,11 @@
> }
>
> @Override
> + protected Type getType() {
> + return Type.NUMERICAL;
> + }
> +
> + @Override
> public boolean equals(Object obj) {
> if (this == obj)
> return true;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/Frequencies.java Tue Dec 15 17:39:18 2009
> @@ -32,7 +32,6 @@
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
> import org.apache.mahout.common.CommandLineUtil;
> -import org.apache.mahout.df.tools.FrequenciesJob;
> import org.apache.mahout.df.data.DataUtils;
> import org.slf4j.Logger;
> import org.slf4j.LoggerFactory;
> @@ -91,7 +90,7 @@
>
> runTool(dataPath, datasetPath);
> } catch (OptionException e) {
> - System.err.println("Exception : " + e);
> + log.warn(e.toString(), e);
> CommandLineUtil.printHelp(group);
> }
>
> @@ -114,7 +113,7 @@
>
> // compute the partitions' sizes
> int numPartitions = counts.length;
> - int[] sizes = new int[numPartitions];
> + int[] sizes = new int[numPartitions]; // TODO this isn't used?
> for (int p = 0; p < numPartitions; p++) {
> sizes[p] = DataUtils.sum(counts[p]);
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java Tue Dec 15 17:39:18 2009
> @@ -27,6 +27,7 @@
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.io.Writable;
> import org.apache.hadoop.mapreduce.Job;
> +import org.apache.hadoop.mapreduce.JobContext;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.apache.hadoop.mapreduce.Reducer;
> import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
> @@ -125,7 +126,7 @@
> * @return counts[partition][label] = num tuples from 'partition' with class == label
> * @throws java.io.IOException
> */
> - protected int[][] parseOutput(Job job) throws IOException {
> + protected int[][] parseOutput(JobContext job) throws IOException {
> Configuration conf = job.getConfiguration();
>
> int numMaps = conf.getInt("mapred.map.tasks", -1);
> @@ -223,8 +224,7 @@
> }
>
> @Override
> - protected void reduce(LongWritable key, Iterable<IntWritable> values, Reducer<LongWritable,
> - IntWritable, LongWritable, Frequencies>.Context context) throws IOException, InterruptedException {
> + protected void reduce(LongWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
> int[] counts = new int[nblabels];
> for (IntWritable value : values){
> counts[value.get()]++;
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java Tue Dec 15 17:39:18 2009
> @@ -26,13 +26,16 @@
> import java.util.Scanner;
>
> /**
> - * This tool is used to uniformely distribute the class of all the tuples of the dataset over a given number of
> + * This tool is used to uniformly distribute the class of all the tuples of the dataset over a given number of
> * partitions.
> */
> public class UDistrib {
>
> private static final Logger log = LoggerFactory.getLogger(UDistrib.class);
>
> + private UDistrib() {
> + }
> +
> /**
> * Launch the uniform distribution tool. Requires the following command line arguments:<br>
> *
> @@ -90,7 +93,7 @@
>
> runTool(data, dataset, output, numPartitions);
> } catch (OptionException e) {
> - System.err.println("Exception : " + e);
> + log.warn(e.toString(), e);
> CommandLineUtil.printHelp(group);
> }
>
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Tue Dec 15 17:39:18 2009
> @@ -17,18 +17,6 @@
>
> package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
>
> -import java.io.IOException;
> -import java.util.Arrays;
> -import java.util.Collections;
> -import java.util.Comparator;
> -import java.util.HashMap;
> -import java.util.HashSet;
> -import java.util.Iterator;
> -import java.util.Map;
> -import java.util.List;
> -import java.util.ArrayList;
> -import java.util.Set;
> -import java.util.Map.Entry;
> import org.apache.commons.lang.mutable.MutableLong;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> @@ -43,11 +31,20 @@
> import org.slf4j.Logger;
> import org.slf4j.LoggerFactory;
>
> -/**
> - * PFPGrowth Class has both vanilla FPGrowth and Top K FPGrowth
> - *
> - * @param <A>
> - */
> +import java.io.IOException;
> +import java.util.ArrayList;
> +import java.util.Arrays;
> +import java.util.Collections;
> +import java.util.Comparator;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.Iterator;
> +import java.util.List;
> +import java.util.Map;
> +import java.util.Map.Entry;
> +import java.util.Set;
> +
> +/** PFPGrowth Class has both vanilla FPGrowth and Top K FPGrowth */
> public class FPGrowth<A extends Comparable<? super A>> {
>
> private static final Logger log = LoggerFactory.getLogger(FPGrowth.class);
> @@ -62,19 +59,17 @@
> // key is feature value is count
> while (reader.next(key, value)) {
> ret.add(new Pair<String, TopKStringPatterns>(key.toString(),
> - new TopKStringPatterns(value.getPatterns())));
> + new TopKStringPatterns(value.getPatterns())));
> }
> return ret;
> }
>
> /**
> - * Generate the Feature Frequency list from the given transaction whose
> - * frequency > minSupport
> - *
> + * Generate the Feature Frequency list from the given transaction whose frequency > minSupport
> + *
> * @param transactions Iterator over the transaction database
> - * @param minSupport minSupport of the feature to be included
> + * @param minSupport minSupport of the feature to be included
> * @return the List of features and their associated frequency as a Pair
> - * @throws IOException
> */
> public final List<Pair<A, Long>> generateFList(
> Iterator<List<A>> transactions, int minSupport) {
> @@ -84,25 +79,28 @@
> while (transactions.hasNext()) {
> List<A> transaction = transactions.next();
> for (A attribute : transaction) {
> - if (AttributeSupport.containsKey(attribute) == false)
> + if (AttributeSupport.containsKey(attribute) == false) {
> AttributeSupport.put(attribute, new MutableLong(1));
> - else
> + } else {
> AttributeSupport.get(attribute).increment();
> + }
> //count++;
> }
> }
> List<Pair<A, Long>> fList = new ArrayList<Pair<A, Long>>();
> - for (Entry<A, MutableLong> e : AttributeSupport.entrySet())
> + for (Entry<A, MutableLong> e : AttributeSupport.entrySet()) {
> fList.add(new Pair<A, Long>(e.getKey(), e.getValue()
> .longValue()));
> + }
> Collections.sort(fList, new Comparator<Pair<A, Long>>() {
>
> @Override
> public int compare(Pair<A, Long> o1,
> - Pair<A, Long> o2) {
> + Pair<A, Long> o2) {
> int ret = o2.getSecond().compareTo(o1.getSecond());
> - if (ret != 0)
> + if (ret != 0) {
> return ret;
> + }
> return o1.getFirst().compareTo(o2.getFirst());
> }
>
> @@ -112,19 +110,16 @@
> }
>
> /**
> - * Generate Top K Frequent Patterns for every feature in returnableFeatures
> - * given a stream of transactions and the minimum support
> - *
> - * @param transactionStream Iterator of transaction
> - * @param frequencyList list of frequent features and their support value
> - * @param minSupport minimum support of the transactions
> - * @param K Number of top frequent patterns to keep
> - * @param returnableFeatures set of features for which the frequent patterns
> - * are mined. If the set is null, then top K patterns for every
> - * frequent item (an item whose support> minSupport) is generated
> - * @param output The output collector to which the the generated patterns are
> - * written
> - * @throws IOException
> + * Generate Top K Frequent Patterns for every feature in returnableFeatures given a stream of transactions and the
> + * minimum support
> + *
> + * @param transactionStream Iterator of transaction
> + * @param frequencyList list of frequent features and their support value
> + * @param minSupport minimum support of the transactions
> + * @param K Number of top frequent patterns to keep
> + * @param returnableFeatures set of features for which the frequent patterns are mined. If the set is null, then top K
> + * patterns for every frequent item (an item whose support> minSupport) is generated
> + * @param output The output collector to which the the generated patterns are written
> */
> public final void generateTopKFrequentPatterns(
> Iterator<List<A>> transactionStream,
> @@ -142,8 +137,9 @@
> for (Pair<A, Long> feature : frequencyList) {
> A attrib = feature.getFirst();
> Long frequency = feature.getSecond();
> - if (frequency < minSupport)
> + if (frequency < minSupport) {
> continue;
> + }
> attributeIdMapping.put(attrib, id);
> reverseMapping.put(id++, attrib);
> }
> @@ -152,8 +148,9 @@
> for (Pair<A, Long> feature : frequencyList) {
> A attrib = feature.getFirst();
> Long frequency = feature.getSecond();
> - if (frequency < minSupport)
> + if (frequency < minSupport) {
> break;
> + }
> attributeFrequency[attributeIdMapping.get(attrib)] = frequency;
> }
>
> @@ -169,34 +166,34 @@
> }
> }
> } else {
> - for (int j = 0; j < attributeIdMapping.size(); j++)
> + for (int j = 0; j < attributeIdMapping.size(); j++) {
> returnFeatures.add(j);
> + }
> }
>
> log.info("Number of unique pruned items {}", attributeIdMapping.size());
> generateTopKFrequentPatterns(new TransactionIterator<A>(
> transactionStream, attributeIdMapping), attributeFrequency, minSupport,
> - K, reverseMapping.size(), returnFeatures,
> - new TopKPatternsOutputConvertor<A>(output,
> - reverseMapping));
> + K, reverseMapping.size(), returnFeatures,
> + new TopKPatternsOutputConvertor<A>(output,
> + reverseMapping));
>
> }
>
> /**
> * Top K FpGrowth Algorithm
> - *
> - * @param tree to be mined
> + *
> + * @param tree to be mined
> * @param minSupportMutable minimum support of the pattern to keep
> - * @param K Number of top frequent patterns to keep
> - * @param requiredFeatures Set of integer id's of features to mine
> - * @param outputCollector the Collector class which converts the given
> - * frequent pattern in integer to A
> + * @param K Number of top frequent patterns to keep
> + * @param requiredFeatures Set of integer id's of features to mine
> + * @param outputCollector the Collector class which converts the given frequent pattern in integer to A
> * @return Top K Frequent Patterns for each feature and their support
> - * @throws IOException
> */
> private Map<Integer, FrequentPatternMaxHeap> fpGrowth(FPTree tree,
> - MutableLong minSupportMutable, int K, Set<Integer> requiredFeatures,
> - TopKPatternsOutputConvertor<A> outputCollector)
> + MutableLong minSupportMutable, int K,
> + Set<Integer> requiredFeatures,
> + TopKPatternsOutputConvertor<A> outputCollector)
> throws IOException {
>
> int minSupportValue = minSupportMutable.intValue();
> @@ -205,12 +202,13 @@
> FPTreeDepthCache treeCache = new FPTreeDepthCache();
> for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
> int attribute = tree.getAttributeAtIndex(i);
> - if (requiredFeatures.contains(attribute) == false)
> + if (requiredFeatures.contains(attribute) == false) {
> continue;
> + }
> log.info("Mining FTree Tree for all patterns with {}", attribute);
> MutableLong minSupport = new MutableLong(minSupportValue);
> FrequentPatternMaxHeap frequentPatterns = growth(tree, minSupport, K,
> - treeCache, 0, attribute);
> + treeCache, 0, attribute);
> Patterns.put(attribute, frequentPatterns);
> outputCollector.collect(attribute, frequentPatterns);
>
> @@ -219,12 +217,12 @@
> attribute).count(), Patterns.get(attribute).leastSupport());
> }
> log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}",
> - treeCache.getHits(), treeCache.getMisses());
> + treeCache.getHits(), treeCache.getMisses());
> return Patterns;
> }
>
> private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int K,
> - MutableLong minSupportMutable) {
> + MutableLong minSupportMutable) {
> FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
>
> int tempNode = FPTree.ROOTNODEID;
> @@ -232,7 +230,7 @@
> while (tree.childCount(tempNode) != 0) {
> if (tree.childCount(tempNode) > 1) {
> log.info("This should not happen {} {}", tree.childCount(tempNode),
> - tempNode);
> + tempNode);
> }
> tempNode = tree.childAtIndex(tempNode, 0);
> if (tree.count(tempNode) < minSupportMutable.intValue()) {
> @@ -248,23 +246,18 @@
> }
>
> /**
> - * Internal TopKFrequentPattern Generation algorithm, which represents the
> - * A's as integers and transforms features to use only
> - * integers
> - *
> - * @param transactions Transaction database Iterator
> - * @param attributeFrequency array representing the Frequency of the
> - * corresponding attribute id
> - * @param minSupport minimum support of the pattern to be mined
> - * @param K Max value of the Size of the Max-Heap in which Patterns are held
> - * @param featureSetSize number of features
> - * @param returnFeatures the id's of the features for which Top K patterns
> - * have to be mined
> - * @param topKPatternsOutputCollector the outputCollector which transforms the
> - * given Pattern in integer format to the corresponding
> - * A Format
> + * Internal TopKFrequentPattern Generation algorithm, which represents the A's as integers and transforms features to
> + * use only integers
> + *
> + * @param transactions Transaction database Iterator
> + * @param attributeFrequency array representing the Frequency of the corresponding attribute id
> + * @param minSupport minimum support of the pattern to be mined
> + * @param K Max value of the Size of the Max-Heap in which Patterns are held
> + * @param featureSetSize number of features
> + * @param returnFeatures the id's of the features for which Top K patterns have to be mined
> + * @param topKPatternsOutputCollector the outputCollector which transforms the given Pattern in integer format to the
> + * corresponding A Format
> * @return Top K frequent patterns for each attribute
> - * @throws IOException
> */
> private Map<Integer, FrequentPatternMaxHeap> generateTopKFrequentPatterns(
> Iterator<int[]> transactions,
> @@ -291,7 +284,7 @@
> Arrays.sort(transaction);
> //attribcount += transaction.length;
> nodecount += treeAddCount(tree, transaction, 1, minSupportMutable,
> - attributeFrequency);
> + attributeFrequency);
> i++;
> if (i % 10000 == 0) {
> log.info("FPTree Building: Read {} Transactions", i);
> @@ -301,47 +294,51 @@
> log.info("Number of Nodes in the FP Tree: {}", nodecount);
>
> return fpGrowth(tree, minSupportMutable, K, returnFeatures,
> - topKPatternsOutputCollector);
> + topKPatternsOutputCollector);
> }
>
> - private FrequentPatternMaxHeap growth(FPTree tree,
> - MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
> - int level, int currentAttribute) {
> + private static FrequentPatternMaxHeap growth(FPTree tree,
> + MutableLong minSupportMutable,
> + int K,
> + FPTreeDepthCache treeCache,
> + int level,
> + int currentAttribute) {
> FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
>
> int i = Arrays.binarySearch(tree.getHeaderTableAttributes(),
> - currentAttribute);
> - if (i < 0)
> + currentAttribute);
> + if (i < 0) {
> return frequentPatterns;
> + }
>
> - FrequentPatternMaxHeap returnedPatterns;
> int headerTableCount = tree.getHeaderTableCount();
> while (i < headerTableCount) {
> int attribute = tree.getAttributeAtIndex(i);
> long count = tree.getHeaderSupportCount(attribute);
> - if (count < minSupportMutable.intValue()){
> - i++;
> - continue;
> + if (count < minSupportMutable.intValue()) {
> + i++;
> + continue;
> }
>
> FPTree conditionalTree = treeCache.getFirstLevelTree(attribute);
> if (conditionalTree.isEmpty()) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> }
>
> + FrequentPatternMaxHeap returnedPatterns;
> if (attribute == currentAttribute) {
>
> returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, K,
> - treeCache, level + 1, true, currentAttribute);
> + treeCache, level + 1, true, currentAttribute);
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, true, true);
> + attribute, count, true, true);
> } else {
> returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, K,
> - treeCache, level + 1, false, currentAttribute);
> + treeCache, level + 1, false, currentAttribute);
>
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, false, true);
> + attribute, count, false, true);
> }
> if (frequentPatterns.isFull()) {
> if (minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
> @@ -355,56 +352,59 @@
> }
>
> private static FrequentPatternMaxHeap growthBottomUp(FPTree tree,
> - MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
> - int level, boolean conditionalOfCurrentAttribute, int currentAttribute) {
> + MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
> + int level, boolean conditionalOfCurrentAttribute,
> + int currentAttribute) {
> FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
> if (conditionalOfCurrentAttribute == false) {
> int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
> - currentAttribute);
> - if (index < 0)
> + currentAttribute);
> + if (index < 0) {
> return frequentPatterns;
> - else {
> + } else {
> int attribute = tree.getAttributeAtIndex(index);
> long count = tree.getHeaderSupportCount(attribute);
> - if (count < minSupportMutable.longValue())
> + if (count < minSupportMutable.longValue()) {
> return frequentPatterns;
> + }
> }
> }
> if (tree.singlePath()) {
> return generateSinglePathPatterns(tree, K, minSupportMutable);
> }
> - FrequentPatternMaxHeap returnedPatterns;
> for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
> int attribute = tree.getAttributeAtIndex(i);
> long count = tree.getHeaderSupportCount(attribute);
> - if (count < minSupportMutable.longValue())
> + if (count < minSupportMutable.longValue()) {
> continue;
> + }
> FPTree conditionalTree = treeCache.getTree(level);
>
> + FrequentPatternMaxHeap returnedPatterns;
> if (conditionalOfCurrentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, true, currentAttribute);
> + K, treeCache, level + 1, true, currentAttribute);
>
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, true, false);
> + attribute, count, true, false);
> } else {
> if (attribute == currentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, true, currentAttribute);
> + K, treeCache, level + 1, true, currentAttribute);
>
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, true, false);
> + attribute, count, true, false);
> } else if (attribute > currentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, false, currentAttribute);
> + K, treeCache, level + 1, false, currentAttribute);
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, false, false);
> + attribute, count, false, false);
> }
> }
>
> @@ -419,57 +419,60 @@
> }
>
> private static FrequentPatternMaxHeap growthTopDown(FPTree tree,
> - MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
> - int level, boolean conditionalOfCurrentAttribute, int currentAttribute) {
> + MutableLong minSupportMutable, int K, FPTreeDepthCache treeCache,
> + int level, boolean conditionalOfCurrentAttribute,
> + int currentAttribute) {
> FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(K);
> if (conditionalOfCurrentAttribute == false) {
> int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
> - currentAttribute);
> - if (index < 0)
> + currentAttribute);
> + if (index < 0) {
> return frequentPatterns;
> - else {
> + } else {
> int attribute = tree.getAttributeAtIndex(index);
> long count = tree.getHeaderSupportCount(attribute);
> - if (count < minSupportMutable.intValue())
> + if (count < minSupportMutable.intValue()) {
> return frequentPatterns;
> + }
> }
> }
> if (tree.singlePath()) {
> return generateSinglePathPatterns(tree, K, minSupportMutable);
> }
> - FrequentPatternMaxHeap returnedPatterns;
> for (int i = 0; i < tree.getHeaderTableCount(); i++) {
> int attribute = tree.getAttributeAtIndex(i);
> long count = tree.getHeaderSupportCount(attribute);
> - if (count < minSupportMutable.longValue())
> + if (count < minSupportMutable.longValue()) {
> continue;
> + }
>
> FPTree conditionalTree = treeCache.getTree(level);
>
> + FrequentPatternMaxHeap returnedPatterns;
> if (conditionalOfCurrentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, true, currentAttribute);
> + K, treeCache, level + 1, true, currentAttribute);
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, true, true);
> + attribute, count, true, true);
>
> } else {
> if (attribute == currentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, true, currentAttribute);
> + K, treeCache, level + 1, true, currentAttribute);
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, true, false);
> + attribute, count, true, false);
>
> } else if (attribute > currentAttribute) {
> traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
> - minSupportMutable, conditionalTree, tree);
> + minSupportMutable, conditionalTree, tree);
> returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
> - K, treeCache, level + 1, false, currentAttribute);
> + K, treeCache, level + 1, false, currentAttribute);
> frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
> - attribute, count, false, true);
> + attribute, count, false, true);
>
> }
> }
> @@ -506,7 +509,8 @@
> }
>
> private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
> - MutableLong minSupportMutable, FPTree conditionalTree, FPTree tree) {
> + MutableLong minSupportMutable, FPTree conditionalTree,
> + FPTree tree) {
>
> // Build Subtable
> int conditionalNode = firstConditionalNode;
> @@ -564,25 +568,25 @@
> }
>
> /**
> - * Create FPTree with node counts incremented by addCount variable given the
> - * root node and the List of Attributes in transaction sorted by support
> - *
> - * @param tree object to which the transaction has to be added to
> - * @param myList List of transactions sorted by support
> - * @param addCount amount by which the Node count has to be incremented
> - * @param minSupport the MutableLong value which contains the current
> - * value(dynamic) of support
> + * Create FPTree with node counts incremented by addCount variable given the root node and the List of Attributes in
> + * transaction sorted by support
> + *
> + * @param tree object to which the transaction has to be added to
> + * @param myList List of transactions sorted by support
> + * @param addCount amount by which the Node count has to be incremented
> + * @param minSupport the MutableLong value which contains the current value(dynamic) of support
> * @param attributeFrequency the list of attributes and their frequency
> * @return the number of new nodes added
> */
> private static int treeAddCount(FPTree tree, int[] myList, int addCount,
> - MutableLong minSupport, long[] attributeFrequency) {
> + MutableLong minSupport, long[] attributeFrequency) {
> int temp = FPTree.ROOTNODEID;
> int ret = 0;
> boolean addCountMode = true;
> for (int attribute : myList) {
> - if (attributeFrequency[attribute] < minSupport.intValue())
> + if (attributeFrequency[attribute] < minSupport.intValue()) {
> return ret;
> + }
> int child;
> if (addCountMode) {
> child = tree.childWithAttribute(temp, attribute);
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Tue Dec 15 17:39:18 2009
> @@ -74,8 +74,9 @@
> }
>
> private FPTree(int size, int headersize) {
> - if (size < DEFAULT_INITIAL_SIZE)
> + if (size < DEFAULT_INITIAL_SIZE) {
> size = DEFAULT_INITIAL_SIZE;
> + }
>
> parent = new int[size];
> next = new int[size];
> @@ -151,8 +152,9 @@
> public final int childWithAttribute(int nodeId, int childAttribute) {
> int length = childCount[nodeId];
> for (int i = 0; i < length; i++) {
> - if (attribute[nodeChildren[nodeId][i]] == childAttribute)
> + if (attribute[nodeChildren[nodeId][i]] == childAttribute) {
> return nodeChildren[nodeId][i];
> + }
> }
> return -1;
> }
> @@ -165,8 +167,9 @@
> }
>
> public final void clearConditional() {
> - for (int i = nodes - 1; i >= 0; i--)
> + for (int i = nodes - 1; i >= 0; i--) {
> conditional[i] = 0;
> + }
> }
>
> public final int conditional(int nodeId) {
> @@ -188,8 +191,9 @@
> this.attribute[nodes] = attribute;
> nodeCount[nodes] = count;
>
> - if (nodeChildren[nodes] == null)
> + if (nodeChildren[nodes] == null) {
> nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
> + }
>
> int childNodeId = nodes++;
> return childNodeId;
> @@ -207,8 +211,9 @@
> nodeCount[nodes] = count;
>
> conditional[nodes] = 0;
> - if (nodeChildren[nodes] == null)
> + if (nodeChildren[nodes] == null) {
> nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
> + }
>
> int childNodeId = nodes++;
> addChild(parentNodeId, childNodeId);
> @@ -222,10 +227,10 @@
> parent[nodes] = 0;
> attribute[nodes] = -1;
> nodeCount[nodes] = 0;
> - if (nodeChildren[nodes] == null)
> + if (nodeChildren[nodes] == null) {
> nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
> - int childNodeId = nodes++;
> - return childNodeId;
> + }
> + return nodes++;
> }
>
> public final int getAttributeAtIndex(int index) {
> @@ -308,15 +313,18 @@
> }
>
> private int getHeaderIndex(int attribute) {
> - if (attribute >= headerTableLookup.length)
> + if (attribute >= headerTableLookup.length) {
> resizeHeaderLookup(attribute);
> + }
> int index = headerTableLookup[attribute];
> if (index == -1) { // if attribute didnt exist;
> - if (headerTableCount >= headerTableAttributes.length)
> + if (headerTableCount >= headerTableAttributes.length) {
> resizeHeaderTable();
> + }
> headerTableAttributes[headerTableCount] = attribute;
> - if (headerTableProperties[headerTableCount] == null)
> + if (headerTableProperties[headerTableCount] == null) {
> headerTableProperties[headerTableCount] = new int[HEADERTABLEBLOCKSIZE];
> + }
> headerTableAttributeCount[headerTableCount] = 0;
> headerTableProperties[headerTableCount][HT_NEXT] = -1;
> headerTableProperties[headerTableCount][HT_LAST] = -1;
> @@ -328,8 +336,9 @@
>
> private void resize() {
> int size = (int) (GROWTH_RATE * nodes);
> - if (size < DEFAULT_INITIAL_SIZE)
> + if (size < DEFAULT_INITIAL_SIZE) {
> size = DEFAULT_INITIAL_SIZE;
> + }
>
> int[] oldChildCount = childCount;
> int[] oldAttribute = attribute;
> @@ -360,8 +369,9 @@
> private void resizeChildren(int nodeId) {
> int length = childCount[nodeId];
> int size = (int) (GROWTH_RATE * (length));
> - if (size < DEFAULT_CHILDREN_INITIAL_SIZE)
> + if (size < DEFAULT_CHILDREN_INITIAL_SIZE) {
> size = DEFAULT_CHILDREN_INITIAL_SIZE;
> + }
> int[] oldNodeChildren = nodeChildren[nodeId];
> nodeChildren[nodeId] = new int[size];
> System.arraycopy(oldNodeChildren, 0, this.nodeChildren[nodeId], 0, length);
> @@ -377,8 +387,9 @@
>
> private void resizeHeaderTable() {
> int size = (int) (GROWTH_RATE * (headerTableCount));
> - if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE)
> + if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE) {
> size = DEFAULT_HEADER_TABLE_INITIAL_SIZE;
> + }
>
> int[] oldAttributes = headerTableAttributes;
> long[] oldAttributeCount = headerTableAttributeCount;
> @@ -387,10 +398,10 @@
> headerTableAttributeCount = new long[size];
> headerTableProperties = new int[size][];
> System.arraycopy(oldAttributes, 0, this.headerTableAttributes, 0,
> - headerTableCount);
> + headerTableCount);
> System.arraycopy(oldAttributeCount, 0, this.headerTableAttributeCount, 0,
> - headerTableCount);
> + headerTableCount);
> System.arraycopy(oldProperties, 0, this.headerTableProperties, 0,
> - headerTableCount);
> + headerTableCount);
> }
> }
>
> Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
> URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java?rev=890899&r1=890898&r2=890899&view=diff
> ==============================================================================
> --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java (original)
> +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java Tue Dec 15 17:39:18 2009
> @@ -17,10 +17,10 @@
>
> package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
>
> -import java.util.ArrayList;
> -
> import org.apache.mahout.common.cache.LeastKCache;
>
> +import java.util.ArrayList;
> +
> public class FPTreeDepthCache {
>
> private static int firstLevelCacheSize = 5;
>
>
>
Re: svn commit: r890899 [1/2] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/
main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/ main/
Posted by Sean Owen <sr...@gmail.com>.
Alrighty no problem you can roll it back. Consider the edits I made
though when you apply your patch -- I see lots of formatting and style
differences with the rest of the code.
On Tue, Dec 15, 2009 at 6:12 PM, Robin Anil <ro...@gmail.com> wrote:
> Please dont format pfpgrowth package. It breaks all the new changes
> and code implementations. Especially the fp-bonsai. I have currently
> rejected the changes, because it had become un-mergable. I will put a
> new diff soon.
>
>