You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/05 12:00:26 UTC

svn commit: r906896 [2/2] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/ core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/integer/ core/src/main...

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/string/TopKStringPatterns.java Fri Feb  5 11:00:25 2010
@@ -28,32 +28,36 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.mahout.common.Pair;
 
+/**
+ * A class which collects Top K string patterns
+ *
+ */
 public final class TopKStringPatterns implements Writable {
-  private List<Pair<List<String>, Long>> frequentPatterns = null;
-
+  private List<Pair<List<String>,Long>> frequentPatterns;
+  
   public TopKStringPatterns() {
-    frequentPatterns = new ArrayList<Pair<List<String>, Long>>();
+    frequentPatterns = new ArrayList<Pair<List<String>,Long>>();
   }
-
-  public TopKStringPatterns(List<Pair<List<String>, Long>> patterns) {
-    frequentPatterns = new ArrayList<Pair<List<String>, Long>>();
+  
+  public TopKStringPatterns(List<Pair<List<String>,Long>> patterns) {
+    frequentPatterns = new ArrayList<Pair<List<String>,Long>>();
     frequentPatterns.addAll(patterns);
   }
-
-  public Iterator<Pair<List<String>, Long>> iterator() {
+  
+  public Iterator<Pair<List<String>,Long>> iterator() {
     return frequentPatterns.iterator();
   }
-
-  public List<Pair<List<String>, Long>> getPatterns() {
+  
+  public List<Pair<List<String>,Long>> getPatterns() {
     return frequentPatterns;
   }
-
+  
   public TopKStringPatterns merge(TopKStringPatterns pattern, int heapSize) {
-    List<Pair<List<String>, Long>> patterns = new ArrayList<Pair<List<String>, Long>>();
-    Iterator<Pair<List<String>, Long>> myIterator = frequentPatterns.iterator();
-    Iterator<Pair<List<String>, Long>> otherIterator = pattern.iterator();
-    Pair<List<String>, Long> myItem = null;
-    Pair<List<String>, Long> otherItem = null;
+    List<Pair<List<String>,Long>> patterns = new ArrayList<Pair<List<String>,Long>>();
+    Iterator<Pair<List<String>,Long>> myIterator = frequentPatterns.iterator();
+    Iterator<Pair<List<String>,Long>> otherIterator = pattern.iterator();
+    Pair<List<String>,Long> myItem = null;
+    Pair<List<String>,Long> otherItem = null;
     for (int i = 0; i < heapSize; i++) {
       if (myItem == null && myIterator.hasNext()) {
         myItem = myIterator.next();
@@ -68,7 +72,7 @@
           if (cmp == 0) {
             for (int j = 0; j < myItem.getFirst().size(); j++) {
               cmp = myItem.getFirst().get(j).compareTo(
-                  otherItem.getFirst().get(j));
+                otherItem.getFirst().get(j));
               if (cmp != 0) {
                 break;
               }
@@ -97,7 +101,7 @@
     }
     return new TopKStringPatterns(patterns);
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     frequentPatterns.clear();
@@ -112,14 +116,14 @@
         in.readFully(data);
         items.add(Bytes.toString(data));
       }
-      frequentPatterns.add(new Pair<List<String>, Long>(items, support));
+      frequentPatterns.add(new Pair<List<String>,Long>(items, support));
     }
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(frequentPatterns.size());
-    for (Pair<List<String>, Long> pattern : frequentPatterns) {
+    for (Pair<List<String>,Long> pattern : frequentPatterns) {
       out.writeInt(pattern.getFirst().size());
       out.writeLong(pattern.getSecond());
       for (String item : pattern.getFirst()) {
@@ -129,18 +133,18 @@
       }
     }
   }
-
+  
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
     String sep = "";
-    for (Pair<List<String>, Long> pattern : frequentPatterns) {
+    for (Pair<List<String>,Long> pattern : frequentPatterns) {
       sb.append(sep);
       sb.append(pattern.toString());
       sep = ", ";
-
+      
     }
     return sb.toString();
-
+    
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Fri Feb  5 11:00:25 2010
@@ -27,8 +27,8 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import org.apache.commons.lang.mutable.MutableLong;
 import org.apache.hadoop.conf.Configuration;
@@ -40,97 +40,121 @@
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater;
-import org.apache.mahout.fpm.pfpgrowth.convertors.TopKPatternsOutputConvertor;
+import org.apache.mahout.fpm.pfpgrowth.convertors.TopKPatternsOutputConverter;
 import org.apache.mahout.fpm.pfpgrowth.convertors.TransactionIterator;
 import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** PFPGrowth Class has both vanilla FPGrowth and Top K FPGrowth */
+/**
+ * Implementation of PFGrowth Algorithm with FP-Bonsai pruning
+ * 
+ * @param A
+ *          The object type used as the cell items in a transaction list
+ */
 public class FPGrowth<A extends Comparable<? super A>> {
-
+  
   private static final Logger log = LoggerFactory.getLogger(FPGrowth.class);
-
-  public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(FileSystem fs,
-      Configuration conf, Path path) throws IOException {
-
-    List<Pair<String, TopKStringPatterns>> ret = new ArrayList<Pair<String, TopKStringPatterns>>();
+  
+  public static List<Pair<String,TopKStringPatterns>> readFrequentPattern(FileSystem fs,
+                                                                          Configuration conf,
+                                                                          Path path) throws IOException {
+    
+    List<Pair<String,TopKStringPatterns>> ret = new ArrayList<Pair<String,TopKStringPatterns>>();
     Text key = new Text();
     TopKStringPatterns value = new TopKStringPatterns();
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     // key is feature value is count
     while (reader.next(key, value)) {
-      ret.add(new Pair<String, TopKStringPatterns>(key.toString(), new TopKStringPatterns(value
-          .getPatterns())));
+      ret.add(new Pair<String,TopKStringPatterns>(key.toString(),
+          new TopKStringPatterns(value.getPatterns())));
     }
     return ret;
   }
-
+  
   /**
-   * Generate the Feature Frequency list from the given transaction whose frequency > minSupport
+   * Generate the Feature Frequency list from the given transaction whose
+   * frequency > minSupport
    * 
-   * @param transactions Iterator over the transaction database
-   * @param minSupport minSupport of the feature to be included
+   * @param transactions
+   *          Iterator over the transaction database
+   * @param minSupport
+   *          minSupport of the feature to be included
    * @return the List of features and their associated frequency as a Pair
    */
-  public final List<Pair<A, Long>> generateFList(Iterator<Pair<List<A>, Long>> transactions, int minSupport) {
-
-    Map<A, MutableLong> attributeSupport = new HashMap<A, MutableLong>();
+  public final List<Pair<A,Long>> generateFList(Iterator<Pair<List<A>,Long>> transactions,
+                                                int minSupport) {
+    
+    Map<A,MutableLong> attributeSupport = new HashMap<A,MutableLong>();
     // int count = 0;
     while (transactions.hasNext()) {
-      Pair<List<A>, Long> transaction = transactions.next();
+      Pair<List<A>,Long> transaction = transactions.next();
       for (A attribute : transaction.getFirst()) {
         if (attributeSupport.containsKey(attribute) == false) {
-          attributeSupport.put(attribute, new MutableLong(transaction.getSecond()));
+          attributeSupport.put(attribute, new MutableLong(transaction
+              .getSecond()));
         } else {
-          attributeSupport.get(attribute).add(transaction.getSecond().longValue());
+          attributeSupport.get(attribute).add(
+            transaction.getSecond().longValue());
           // count++;
         }
       }
     }
-    List<Pair<A, Long>> fList = new ArrayList<Pair<A, Long>>();
-    for (Entry<A, MutableLong> e : attributeSupport.entrySet()) {
-      fList.add(new Pair<A, Long>(e.getKey(), e.getValue().longValue()));
-    }
-
-    Collections.sort(fList, new Comparator<Pair<A, Long>>() {
-
+    List<Pair<A,Long>> fList = new ArrayList<Pair<A,Long>>();
+    for (Entry<A,MutableLong> e : attributeSupport.entrySet()) {
+      fList.add(new Pair<A,Long>(e.getKey(), e.getValue().longValue()));
+    }
+    
+    Collections.sort(fList, new Comparator<Pair<A,Long>>() {
+      
       @Override
-      public int compare(Pair<A, Long> o1, Pair<A, Long> o2) {
+      public int compare(Pair<A,Long> o1, Pair<A,Long> o2) {
         int ret = o2.getSecond().compareTo(o1.getSecond());
         if (ret != 0) {
           return ret;
         }
         return o1.getFirst().compareTo(o2.getFirst());
       }
-
+      
     });
-
+    
     return fList;
   }
-
+  
   /**
-   * Generate Top K Frequent Patterns for every feature in returnableFeatures given a stream of transactions
-   * and the minimum support
+   * Generate Top K Frequent Patterns for every feature in returnableFeatures
+   * given a stream of transactions and the minimum support
    * 
-   * @param transactionStream Iterator of transaction
-   * @param frequencyList list of frequent features and their support value
-   * @param minSupport minimum support of the transactions
-   * @param k Number of top frequent patterns to keep
-   * @param returnableFeatures set of features for which the frequent patterns are mined. If the set is null,
-   *        then top K patterns for every frequent item (an item whose support> minSupport) is generated
-   * @param output The output collector to which the the generated patterns are written
+   * @param transactionStream
+   *          Iterator of transaction
+   * @param frequencyList
+   *          list of frequent features and their support value
+   * @param minSupport
+   *          minimum support of the transactions
+   * @param k
+   *          Number of top frequent patterns to keep
+   * @param returnableFeatures
+   *          set of features for which the frequent patterns are mined. If the
+   *          set is null, then top K patterns for every frequent item (an item
+   *          whose support> minSupport) is generated
+   * @param output
+   *          The output collector to which the the generated patterns are
+   *          written
    * @throws IOException
    */
-  public final void generateTopKFrequentPatterns(Iterator<Pair<List<A>, Long>> transactionStream,
-      List<Pair<A, Long>> frequencyList, long minSupport, int k, Set<A> returnableFeatures,
-      OutputCollector<A, List<Pair<List<A>, Long>>> output, StatusUpdater updater) throws IOException {
-
-    Map<Integer, A> reverseMapping = new HashMap<Integer, A>();
-    Map<A, Integer> attributeIdMapping = new HashMap<A, Integer>();
-
+  public final void generateTopKFrequentPatterns(Iterator<Pair<List<A>,Long>> transactionStream,
+                                                 List<Pair<A,Long>> frequencyList,
+                                                 long minSupport,
+                                                 int k,
+                                                 Set<A> returnableFeatures,
+                                                 OutputCollector<A,List<Pair<List<A>,Long>>> output,
+                                                 StatusUpdater updater) throws IOException {
+    
+    Map<Integer,A> reverseMapping = new HashMap<Integer,A>();
+    Map<A,Integer> attributeIdMapping = new HashMap<A,Integer>();
+    
     int id = 0;
-    for (Pair<A, Long> feature : frequencyList) {
+    for (Pair<A,Long> feature : frequencyList) {
       A attrib = feature.getFirst();
       Long frequency = feature.getSecond();
       if (frequency < minSupport) {
@@ -139,9 +163,9 @@
       attributeIdMapping.put(attrib, id);
       reverseMapping.put(id++, attrib);
     }
-
+    
     long[] attributeFrequency = new long[attributeIdMapping.size()];
-    for (Pair<A, Long> feature : frequencyList) {
+    for (Pair<A,Long> feature : frequencyList) {
       A attrib = feature.getFirst();
       Long frequency = feature.getSecond();
       if (frequency < minSupport) {
@@ -149,15 +173,16 @@
       }
       attributeFrequency[attributeIdMapping.get(attrib)] = frequency;
     }
-
+    
     log.info("Number of unique items {}", frequencyList.size());
-
+    
     Set<Integer> returnFeatures = new HashSet<Integer>();
     if (returnableFeatures.isEmpty() == false) {
       for (A attrib : returnableFeatures) {
         if (attributeIdMapping.containsKey(attrib)) {
           returnFeatures.add(attributeIdMapping.get(attrib));
-          log.info("Adding Pattern {}=>{}", attrib, attributeIdMapping.get(attrib));
+          log.info("Adding Pattern {}=>{}", attrib, attributeIdMapping
+              .get(attrib));
         }
       }
     } else {
@@ -165,31 +190,41 @@
         returnFeatures.add(j);
       }
     }
-
+    
     log.info("Number of unique pruned items {}", attributeIdMapping.size());
-    generateTopKFrequentPatterns(new TransactionIterator<A>(transactionStream, attributeIdMapping),
-        attributeFrequency, minSupport, k, reverseMapping.size(), returnFeatures,
-        new TopKPatternsOutputConvertor<A>(output, reverseMapping), updater);
-
+    generateTopKFrequentPatterns(new TransactionIterator<A>(transactionStream,
+        attributeIdMapping), attributeFrequency, minSupport, k, reverseMapping
+        .size(), returnFeatures, new TopKPatternsOutputConverter<A>(output,
+        reverseMapping), updater);
+    
   }
-
+  
   /**
    * Top K FpGrowth Algorithm
    * 
-   * @param tree to be mined
-   * @param minSupportMutable minimum support of the pattern to keep
-   * @param k Number of top frequent patterns to keep
-   * @param requiredFeatures Set of integer id's of features to mine
-   * @param outputCollector the Collector class which converts the given frequent pattern in integer to A
+   * @param tree
+   *          to be mined
+   * @param minSupportMutable
+   *          minimum support of the pattern to keep
+   * @param k
+   *          Number of top frequent patterns to keep
+   * @param requiredFeatures
+   *          Set of integer id's of features to mine
+   * @param outputCollector
+   *          the Collector class which converts the given frequent pattern in
+   *          integer to A
    * @return Top K Frequent Patterns for each feature and their support
    */
-  private Map<Integer, FrequentPatternMaxHeap> fpGrowth(FPTree tree, MutableLong minSupportMutable, int k,
-      Set<Integer> requiredFeatures, TopKPatternsOutputConvertor<A> outputCollector, StatusUpdater updater)
-      throws IOException {
-
+  private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
+                                                       MutableLong minSupportMutable,
+                                                       int k,
+                                                       Set<Integer> requiredFeatures,
+                                                       TopKPatternsOutputConverter<A> outputCollector,
+                                                       StatusUpdater updater) throws IOException {
+    
     long minSupportValue = minSupportMutable.longValue();
-
-    Map<Integer, FrequentPatternMaxHeap> patterns = new HashMap<Integer, FrequentPatternMaxHeap>();
+    
+    Map<Integer,FrequentPatternMaxHeap> patterns = new HashMap<Integer,FrequentPatternMaxHeap>();
     FPTreeDepthCache treeCache = new FPTreeDepthCache();
     for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
       int attribute = tree.getAttributeAtIndex(i);
@@ -198,29 +233,32 @@
       }
       log.info("Mining FTree Tree for all patterns with {}", attribute);
       MutableLong minSupport = new MutableLong(minSupportValue);
-      FrequentPatternMaxHeap frequentPatterns = growth(tree, minSupport, k, treeCache, 0, attribute,
-          updater);
+      FrequentPatternMaxHeap frequentPatterns = growth(tree, minSupport, k,
+        treeCache, 0, attribute, updater);
       patterns.put(attribute, frequentPatterns);
       outputCollector.collect(attribute, frequentPatterns);
-
+      
       minSupportValue = Math.max(minSupportValue, minSupport.longValue() / 2);
-      log.info("Found {} Patterns with Least Support {}", patterns.get(attribute).count(), patterns.get(
-          attribute).leastSupport());
+      log.info("Found {} Patterns with Least Support {}", patterns.get(
+        attribute).count(), patterns.get(attribute).leastSupport());
     }
-    log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}", treeCache.getHits(), treeCache
-        .getMisses());
+    log.info("Tree Cache: First Level: Cache hits={} Cache Misses={}",
+      treeCache.getHits(), treeCache.getMisses());
     return patterns;
   }
-
-  private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int k,
-      MutableLong minSupportMutable) {
-    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false);
-
+  
+  private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree,
+                                                                   int k,
+                                                                   MutableLong minSupportMutable) {
+    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
+        false);
+    
     int tempNode = FPTree.ROOTNODEID;
     Pattern frequentItem = new Pattern();
     while (tree.childCount(tempNode) != 0) {
       if (tree.childCount(tempNode) > 1) {
-        log.info("This should not happen {} {}", tree.childCount(tempNode), tempNode);
+        log.info("This should not happen {} {}", tree.childCount(tempNode),
+          tempNode);
       }
       tempNode = tree.childAtIndex(tempNode, 0);
       if (tree.count(tempNode) < minSupportMutable.intValue()) {
@@ -231,68 +269,87 @@
     if (frequentItem.length() > 0) {
       frequentPatterns.insert(frequentItem);
     }
-
+    
     return frequentPatterns;
   }
-
+  
   /**
-   * Internal TopKFrequentPattern Generation algorithm, which represents the A's as integers and transforms
-   * features to use only integers
+   * Internal TopKFrequentPattern Generation algorithm, which represents the A's
+   * as integers and transforms features to use only integers
    * 
-   * @param transactions Transaction database Iterator
-   * @param attributeFrequency array representing the Frequency of the corresponding attribute id
-   * @param minSupport minimum support of the pattern to be mined
-   * @param k Max value of the Size of the Max-Heap in which Patterns are held
-   * @param featureSetSize number of features
-   * @param returnFeatures the id's of the features for which Top K patterns have to be mined
-   * @param topKPatternsOutputCollector the outputCollector which transforms the given Pattern in integer
-   *        format to the corresponding A Format
+   * @param transactions
+   *          Transaction database Iterator
+   * @param attributeFrequency
+   *          array representing the Frequency of the corresponding attribute id
+   * @param minSupport
+   *          minimum support of the pattern to be mined
+   * @param k
+   *          Max value of the Size of the Max-Heap in which Patterns are held
+   * @param featureSetSize
+   *          number of features
+   * @param returnFeatures
+   *          the id's of the features for which Top K patterns have to be mined
+   * @param topKPatternsOutputCollector
+   *          the outputCollector which transforms the given Pattern in integer
+   *          format to the corresponding A Format
    * @return Top K frequent patterns for each attribute
    */
-  private Map<Integer, FrequentPatternMaxHeap> generateTopKFrequentPatterns(
-      Iterator<Pair<int[], Long>> transactions, long[] attributeFrequency, long minSupport, int k,
-      int featureSetSize, Set<Integer> returnFeatures,
-      TopKPatternsOutputConvertor<A> topKPatternsOutputCollector, StatusUpdater updater) throws IOException {
-
+  private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(Iterator<Pair<int[],Long>> transactions,
+                                                                           long[] attributeFrequency,
+                                                                           long minSupport,
+                                                                           int k,
+                                                                           int featureSetSize,
+                                                                           Set<Integer> returnFeatures,
+                                                                           TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
+                                                                           StatusUpdater updater) throws IOException {
+    
     FPTree tree = new FPTree(featureSetSize);
     for (int i = 0; i < featureSetSize; i++) {
       tree.addHeaderCount(i, attributeFrequency[i]);
     }
-
+    
     // Constructing initial FPTree from the list of transactions
     MutableLong minSupportMutable = new MutableLong(minSupport);
     int nodecount = 0;
     // int attribcount = 0;
     int i = 0;
     while (transactions.hasNext()) {
-      Pair<int[], Long> transaction = transactions.next();
+      Pair<int[],Long> transaction = transactions.next();
       Arrays.sort(transaction.getFirst());
       // attribcount += transaction.length;
-      nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(),
-          minSupportMutable, attributeFrequency);
+      nodecount += treeAddCount(tree, transaction.getFirst(), transaction
+          .getSecond(), minSupportMutable, attributeFrequency);
       i++;
       if (i % 10000 == 0) {
         log.info("FPTree Building: Read {} Transactions", i);
       }
     }
-
+    
     log.info("Number of Nodes in the FP Tree: {}", nodecount);
-
-    return fpGrowth(tree, minSupportMutable, k, returnFeatures, topKPatternsOutputCollector, updater);
-  }
-
-  private static FrequentPatternMaxHeap growth(FPTree tree, MutableLong minSupportMutable, int k,
-      FPTreeDepthCache treeCache, int level, int currentAttribute, StatusUpdater updater) {
-
-    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, true);
-
-    int i = Arrays.binarySearch(tree.getHeaderTableAttributes(), currentAttribute);
+    
+    return fpGrowth(tree, minSupportMutable, k, returnFeatures,
+      topKPatternsOutputCollector, updater);
+  }
+  
+  private static FrequentPatternMaxHeap growth(FPTree tree,
+                                               MutableLong minSupportMutable,
+                                               int k,
+                                               FPTreeDepthCache treeCache,
+                                               int level,
+                                               int currentAttribute,
+                                               StatusUpdater updater) {
+    
+    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
+        true);
+    
+    int i = Arrays.binarySearch(tree.getHeaderTableAttributes(),
+      currentAttribute);
     if (i < 0) {
       return frequentPatterns;
     }
-
+    
     int headerTableCount = tree.getHeaderTableCount();
-
+    
     while (i < headerTableCount) {
       int attribute = tree.getAttributeAtIndex(i);
       long count = tree.getHeaderSupportCount(attribute);
@@ -303,23 +360,25 @@
       updater.update("FPGrowth Algorithm for a given feature: " + attribute);
       FPTree conditionalTree = treeCache.getFirstLevelTree(attribute);
       if (conditionalTree.isEmpty()) {
-        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-            conditionalTree, tree);
+        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+          minSupportMutable, conditionalTree, tree);
         // printTree(conditionalTree);
-
+        
       }
-
+      
       FrequentPatternMaxHeap returnedPatterns;
       if (attribute == currentAttribute) {
-
-        returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, k, treeCache, level + 1, true,
-            currentAttribute, updater);
-
-        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, true, true);
+        
+        returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, k,
+          treeCache, level + 1, true, currentAttribute, updater);
+        
+        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+          attribute, count, true, true);
       } else {
-        returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-            false, currentAttribute, updater);
-        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, false, true);
+        returnedPatterns = growthTopDown(conditionalTree, minSupportMutable, k,
+          treeCache, level + 1, false, currentAttribute, updater);
+        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+          attribute, count, false, true);
       }
       if (frequentPatterns.isFull()) {
         if (minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
@@ -328,18 +387,25 @@
       }
       i++;
     }
-
+    
     return frequentPatterns;
   }
-
-  private static FrequentPatternMaxHeap growthBottomUp(FPTree tree, MutableLong minSupportMutable, int k,
-      FPTreeDepthCache treeCache, int level, boolean conditionalOfCurrentAttribute, int currentAttribute,
-      StatusUpdater updater) {
-
-    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false);
-
+  
+  private static FrequentPatternMaxHeap growthBottomUp(FPTree tree,
+                                                       MutableLong minSupportMutable,
+                                                       int k,
+                                                       FPTreeDepthCache treeCache,
+                                                       int level,
+                                                       boolean conditionalOfCurrentAttribute,
+                                                       int currentAttribute,
+                                                       StatusUpdater updater) {
+    
+    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
+        false);
+    
     if (conditionalOfCurrentAttribute == false) {
-      int index = Arrays.binarySearch(tree.getHeaderTableAttributes(), currentAttribute);
+      int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
+        currentAttribute);
       if (index < 0) {
         return frequentPatterns;
       } else {
@@ -350,11 +416,11 @@
         }
       }
     }
-
+    
     if (tree.singlePath()) {
       return generateSinglePathPatterns(tree, k, minSupportMutable);
     }
-
+    
     updater.update("Bottom Up FP Growth");
     for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
       int attribute = tree.getAttributeAtIndex(i);
@@ -363,50 +429,60 @@
         continue;
       }
       FPTree conditionalTree = treeCache.getTree(level);
-
+      
       FrequentPatternMaxHeap returnedPatterns;
       if (conditionalOfCurrentAttribute) {
-        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-            conditionalTree, tree);
-        returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-            true, currentAttribute, updater);
-
-        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, true, false);
+        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+          minSupportMutable, conditionalTree, tree);
+        returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+          k, treeCache, level + 1, true, currentAttribute, updater);
+        
+        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+          attribute, count, true, false);
       } else {
         if (attribute == currentAttribute) {
-          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-              conditionalTree, tree);
-          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-              true, currentAttribute, updater);
-
-          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, true, false);
+          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+            minSupportMutable, conditionalTree, tree);
+          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+            k, treeCache, level + 1, true, currentAttribute, updater);
+          
+          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+            attribute, count, true, false);
         } else if (attribute > currentAttribute) {
-          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-              conditionalTree, tree);
-          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-              false, currentAttribute, updater);
-          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, false, false);
+          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+            minSupportMutable, conditionalTree, tree);
+          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+            k, treeCache, level + 1, false, currentAttribute, updater);
+          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+            attribute, count, false, false);
         }
       }
-
+      
       if (frequentPatterns.isFull()) {
         if (minSupportMutable.intValue() < frequentPatterns.leastSupport()) {
           minSupportMutable.setValue(frequentPatterns.leastSupport());
         }
       }
     }
-
+    
     return frequentPatterns;
   }
-
-  private static FrequentPatternMaxHeap growthTopDown(FPTree tree, MutableLong minSupportMutable, int k,
-      FPTreeDepthCache treeCache, int level, boolean conditionalOfCurrentAttribute, int currentAttribute,
-      StatusUpdater updater) {
-
-    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, true);
-
+  
+  private static FrequentPatternMaxHeap growthTopDown(FPTree tree,
+                                                      MutableLong minSupportMutable,
+                                                      int k,
+                                                      FPTreeDepthCache treeCache,
+                                                      int level,
+                                                      boolean conditionalOfCurrentAttribute,
+                                                      int currentAttribute,
+                                                      StatusUpdater updater) {
+    
+    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
+        true);
+    
     if (conditionalOfCurrentAttribute == false) {
-      int index = Arrays.binarySearch(tree.getHeaderTableAttributes(), currentAttribute);
+      int index = Arrays.binarySearch(tree.getHeaderTableAttributes(),
+        currentAttribute);
       if (index < 0) {
         return frequentPatterns;
       } else {
@@ -417,46 +493,49 @@
         }
       }
     }
-
+    
     if (tree.singlePath()) {
       return generateSinglePathPatterns(tree, k, minSupportMutable);
     }
-
+    
     updater.update("Top Down Growth:");
-
+    
     for (int i = 0; i < tree.getHeaderTableCount(); i++) {
       int attribute = tree.getAttributeAtIndex(i);
       long count = tree.getHeaderSupportCount(attribute);
       if (count < minSupportMutable.longValue()) {
         continue;
       }
-
+      
       FPTree conditionalTree = treeCache.getTree(level);
-
+      
       FrequentPatternMaxHeap returnedPatterns;
       if (conditionalOfCurrentAttribute) {
-        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-            conditionalTree, tree);
-
-        returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-            true, currentAttribute, updater);
-        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, true, true);
-
+        traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+          minSupportMutable, conditionalTree, tree);
+        
+        returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+          k, treeCache, level + 1, true, currentAttribute, updater);
+        frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+          attribute, count, true, true);
+        
       } else {
         if (attribute == currentAttribute) {
-          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-              conditionalTree, tree);
-          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-              true, currentAttribute, updater);
-          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, true, false);
-
+          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+            minSupportMutable, conditionalTree, tree);
+          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+            k, treeCache, level + 1, true, currentAttribute, updater);
+          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+            attribute, count, true, false);
+          
         } else if (attribute > currentAttribute) {
-          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), minSupportMutable,
-              conditionalTree, tree);
-          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1,
-              false, currentAttribute, updater);
-          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, false, true);
-
+          traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
+            minSupportMutable, conditionalTree, tree);
+          returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
+            k, treeCache, level + 1, false, currentAttribute, updater);
+          frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
+            attribute, count, false, true);
+          
         }
       }
       if (frequentPatterns.isFull()) {
@@ -465,118 +544,128 @@
         }
       }
     }
-
+    
     return frequentPatterns;
   }
-
+  
   private static FrequentPatternMaxHeap mergeHeap(FrequentPatternMaxHeap frequentPatterns,
-      FrequentPatternMaxHeap returnedPatterns, int attribute, long count, boolean addAttribute,
-      boolean subPatternCheck) {
+                                                  FrequentPatternMaxHeap returnedPatterns,
+                                                  int attribute,
+                                                  long count,
+                                                  boolean addAttribute,
+                                                  boolean subPatternCheck) {
     frequentPatterns.addAll(returnedPatterns, attribute, count);
     if (frequentPatterns.addable(count) && addAttribute) {
       Pattern p = new Pattern();
       p.add(attribute, count);
       frequentPatterns.insert(p);
     }
-
+    
     return frequentPatterns;
   }
-
+  
   private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
-      MutableLong minSupportMutable, FPTree conditionalTree, FPTree tree) {
-
+                                                            MutableLong minSupportMutable,
+                                                            FPTree conditionalTree,
+                                                            FPTree tree) {
+    
     // Build Subtable
     int conditionalNode = firstConditionalNode;
-
+    
     while (conditionalNode != -1) {
       long nextNodeCount = tree.count(conditionalNode);
       int pathNode = tree.parent(conditionalNode);
       int prevConditional = -1;
-
+      
       while (pathNode != 0) { // dummy root node
         int attribute = tree.attribute(pathNode);
-        if (tree.getHeaderSupportCount(attribute) < minSupportMutable.intValue()) {
+        if (tree.getHeaderSupportCount(attribute) < minSupportMutable
+            .intValue()) {
           pathNode = tree.parent(pathNode);
           continue;
         }
         // update and increment the headerTable Counts
         conditionalTree.addHeaderCount(attribute, nextNodeCount);
-
+        
         int conditional = tree.conditional(pathNode);
         // if its a new conditional tree node
-
+        
         if (conditional == 0) {
-          tree.setConditional(pathNode, conditionalTree.createConditionalNode(attribute, 0));
+          tree.setConditional(pathNode, conditionalTree.createConditionalNode(
+            attribute, 0));
           conditional = tree.conditional(pathNode);
           conditionalTree.addHeaderNext(attribute, conditional);
         } else {
           conditionalTree.setSinglePath(false);
         }
-
+        
         if (prevConditional != -1) { // if there is a child element
           conditionalTree.setParent(prevConditional, conditional);
         }
-
+        
         conditionalTree.addCount(conditional, nextNodeCount);
         prevConditional = conditional;
-
+        
         pathNode = tree.parent(pathNode);
-
+        
       }
       if (prevConditional != -1) {
         conditionalTree.setParent(prevConditional, FPTree.ROOTNODEID);
-        if (conditionalTree.childCount(FPTree.ROOTNODEID) > 1 && conditionalTree.singlePath()) {
+        if (conditionalTree.childCount(FPTree.ROOTNODEID) > 1
+            && conditionalTree.singlePath()) {
           conditionalTree.setSinglePath(false);
-
+          
         }
       }
       conditionalNode = tree.next(conditionalNode);
     }
-
+    
     tree.clearConditional();
     conditionalTree.reorderHeaderTable();
     pruneFPTree(minSupportMutable, conditionalTree);
     // prune Conditional Tree
-
+    
   }
-
+  
   private static void pruneFPTree(MutableLong minSupportMutable, FPTree tree) {
     for (int i = 0; i < tree.getHeaderTableCount(); i++) {
       int currentAttribute = tree.getAttributeAtIndex(i);
-      if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable.intValue()) {
+      if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable
+          .intValue()) {
         int nextNode = tree.getHeaderNext(currentAttribute);
         tree.removeHeaderNext(currentAttribute);
         while (nextNode != -1) {
-
+          
           int mychildCount = tree.childCount(nextNode);
-
+          
           int parentNode = tree.parent(nextNode);
-
+          
           for (int j = 0; j < mychildCount; j++) {
             Integer myChildId = tree.childAtIndex(nextNode, j);
             tree.replaceChild(parentNode, nextNode, myChildId);
           }
           nextNode = tree.next(nextNode);
         }
-
+        
       }
     }
-
+    
     for (int i = 0; i < tree.getHeaderTableCount(); i++) {
       int currentAttribute = tree.getAttributeAtIndex(i);
       int nextNode = tree.getHeaderNext(currentAttribute);
-
+      
       FastByIDMap<Integer> prevNode = new FastByIDMap<Integer>();
       int justPrevNode = -1;
       while (nextNode != -1) {
-
+        
         int parent = tree.parent(nextNode);
-
+        
         if (prevNode.containsKey(parent) == false) {
           prevNode.put(parent, nextNode);
         } else {
           int prevNodeId = prevNode.get(parent);
-          if (1 >= tree.childCount(prevNodeId) && 1 >= tree.childCount(nextNode)) {
+          if (1 >= tree.childCount(prevNodeId)
+              && 1 >= tree.childCount(nextNode)) {
             tree.addCount(prevNodeId, tree.count(nextNode));
             if (tree.childCount(nextNode) == 1) {
               tree.addChild(prevNodeId, tree.childAtIndex(nextNode, 0));
@@ -589,29 +678,38 @@
         nextNode = tree.next(nextNode);
       }
     }
-
+    
     // prune Conditional Tree
-
+    
   }
-
+  
   /**
-   * Create FPTree with node counts incremented by addCount variable given the root node and the List of
-   * Attributes in transaction sorted by support
+   * Create FPTree with node counts incremented by addCount variable given the
+   * root node and the List of Attributes in transaction sorted by support
    * 
-   * @param tree object to which the transaction has to be added to
-   * @param myList List of transactions sorted by support
-   * @param addCount amount by which the Node count has to be incremented
-   * @param minSupport the MutableLong value which contains the current value(dynamic) of support
-   * @param attributeFrequency the list of attributes and their frequency
+   * @param tree
+   *          object to which the transaction has to be added to
+   * @param myList
+   *          List of transactions sorted by support
+   * @param addCount
+   *          amount by which the Node count has to be incremented
+   * @param minSupport
+   *          the MutableLong value which contains the current value(dynamic) of
+   *          support
+   * @param attributeFrequency
+   *          the list of attributes and their frequency
    * @return the number of new nodes added
    */
-  private static int treeAddCount(FPTree tree, int[] myList, long addCount, MutableLong minSupport,
-      long[] attributeFrequency) {
-
+  private static int treeAddCount(FPTree tree,
+                                  int[] myList,
+                                  long addCount,
+                                  MutableLong minSupport,
+                                  long[] attributeFrequency) {
+    
     int temp = FPTree.ROOTNODEID;
     int ret = 0;
     boolean addCountMode = true;
-
+    
     for (int attribute : myList) {
       if (attributeFrequency[attribute] < minSupport.intValue()) {
         return ret;
@@ -632,8 +730,8 @@
         ret++;
       }
     }
-
+    
     return ret;
-
+    
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Fri Feb  5 11:00:25 2010
@@ -21,86 +21,91 @@
 import java.util.Set;
 import java.util.TreeSet;
 
+/**
+ * The Frequent Pattern Tree datastructure used for mining patterns using
+ * {@link FPGrowth} algorithm
+ * 
+ */
 public class FPTree {
-
+  
   public static final int ROOTNODEID = 0;
-
+  
   private static final int DEFAULT_CHILDREN_INITIAL_SIZE = 2;
-
+  
   private static final int DEFAULT_HEADER_TABLE_INITIAL_SIZE = 4;
-
+  
   private static final int DEFAULT_INITIAL_SIZE = 8;
-
+  
   private static final float GROWTH_RATE = 1.5f;
-
+  
   private static final int HEADERTABLEBLOCKSIZE = 2;
-
+  
   private static final int HT_LAST = 1;
-
+  
   private static final int HT_NEXT = 0;
-
+  
   private int[] attribute;
-
+  
   private int[] childCount;
-
+  
   private int[] conditional;
-
+  
   private long[] headerTableAttributeCount;
-
+  
   private int[] headerTableAttributes;
-
-  private int headerTableCount = 0;
-
+  
+  private int headerTableCount;
+  
   private int[] headerTableLookup;
-
+  
   private int[][] headerTableProperties;
-
+  
   private int[] next;
-
+  
   private int[][] nodeChildren;
-
+  
   private long[] nodeCount;
-
-  private int nodes = 0;
-
+  
+  private int nodes;
+  
   private int[] parent;
-
+  
   private boolean singlePath;
-
+  
   private final Set<Integer> sortedSet = new TreeSet<Integer>();
-
+  
   public FPTree() {
     this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
   }
-
+  
   public FPTree(int size) {
     this(size, DEFAULT_HEADER_TABLE_INITIAL_SIZE);
   }
-
+  
   private FPTree(int size, int headersize) {
     if (size < DEFAULT_INITIAL_SIZE) {
       size = DEFAULT_INITIAL_SIZE;
     }
-
+    
     parent = new int[size];
     next = new int[size];
     childCount = new int[size];
     attribute = new int[size];
     nodeCount = new long[size];
-
+    
     nodeChildren = new int[size][];
     conditional = new int[size];
-
+    
     headerTableAttributes = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
     headerTableAttributeCount = new long[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
     headerTableLookup = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE];
     Arrays.fill(headerTableLookup, -1);
     headerTableProperties = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE][];
-
+    
     singlePath = true;
     createRootNode();
   }
-
+  
   public final void addChild(int parentNodeId, int childnodeId) {
     int length = childCount[parentNodeId];
     if (length >= nodeChildren[parentNodeId].length) {
@@ -108,12 +113,12 @@
     }
     nodeChildren[parentNodeId][length++] = childnodeId;
     childCount[parentNodeId] = length;
-
+    
     if (length > 1 && singlePath) {
       singlePath = false;
     }
   }
-
+  
   public final boolean addCount(int nodeId, long count) {
     if (nodeId < nodes) {
       this.nodeCount[nodeId] += count;
@@ -121,12 +126,12 @@
     }
     return false;
   }
-
+  
   public final void addHeaderCount(int attributeValue, long count) {
     int index = getHeaderIndex(attributeValue);
     headerTableAttributeCount[index] += count;
   }
-
+  
   public final void addHeaderNext(int attributeValue, int nodeId) {
     int index = getHeaderIndex(attributeValue);
     if (headerTableProperties[index][HT_NEXT] == -1) {
@@ -137,22 +142,22 @@
       headerTableProperties[index][HT_LAST] = nodeId;
     }
   }
-
+  
   public final int attribute(int nodeId) {
     return this.attribute[nodeId];
   }
-
+  
   public final int childAtIndex(int nodeId, int index) {
     if (childCount[nodeId] < index) {
       return -1;
     }
     return nodeChildren[nodeId][index];
   }
-
+  
   public final int childCount(int nodeId) {
     return childCount[nodeId];
   }
-
+  
   public final int childWithAttribute(int nodeId, int childAttribute) {
     int length = childCount[nodeId];
     for (int i = 0; i < length; i++) {
@@ -162,7 +167,7 @@
     }
     return -1;
   }
-
+  
   public final void clear() {
     nodes = 0;
     headerTableCount = 0;
@@ -171,21 +176,21 @@
     sortedSet.clear();
     createRootNode();
   }
-
+  
   public final void clearConditional() {
     for (int i = nodes - 1; i >= 0; i--) {
       conditional[i] = 0;
     }
   }
-
+  
   public final int conditional(int nodeId) {
     return this.conditional[nodeId];
   }
-
+  
   public final long count(int nodeId) {
     return nodeCount[nodeId];
   }
-
+  
   public final int createConditionalNode(int attributeValue, long count) {
     if (nodes >= this.attribute.length) {
       resize();
@@ -196,36 +201,36 @@
     conditional[nodes] = 0;
     this.attribute[nodes] = attributeValue;
     nodeCount[nodes] = count;
-
+    
     if (nodeChildren[nodes] == null) {
       nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
     }
-
+    
     return nodes++;
   }
-
+  
   public final int createNode(int parentNodeId, int attributeValue, long count) {
     if (nodes >= this.attribute.length) {
       resize();
     }
-
+    
     childCount[nodes] = 0;
     next[nodes] = -1;
     parent[nodes] = parentNodeId;
     this.attribute[nodes] = attributeValue;
     nodeCount[nodes] = count;
-
+    
     conditional[nodes] = 0;
     if (nodeChildren[nodes] == null) {
       nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE];
     }
-
+    
     int childNodeId = nodes++;
     addChild(parentNodeId, childNodeId);
     addHeaderNext(attributeValue, childNodeId);
     return childNodeId;
   }
-
+  
   public final int createRootNode() {
     childCount[nodes] = 0;
     next[nodes] = -1;
@@ -237,48 +242,48 @@
     }
     return nodes++;
   }
-
+  
   public final int getAttributeAtIndex(int index) {
     return headerTableAttributes[index];
   }
-
+  
   public final int getHeaderNext(int attributeValue) {
     int index = getHeaderIndex(attributeValue);
     return headerTableProperties[index][HT_NEXT];
   }
-
+  
   public final long getHeaderSupportCount(int attributeValue) {
     int index = getHeaderIndex(attributeValue);
     return headerTableAttributeCount[index];
   }
-
+  
   public final int[] getHeaderTableAttributes() {
     int[] attributes = new int[headerTableCount];
     System.arraycopy(headerTableAttributes, 0, attributes, 0, headerTableCount);
     return attributes;
   }
-
+  
   public final int getHeaderTableCount() {
     return headerTableCount;
   }
-
+  
   public final boolean isEmpty() {
     return nodes <= 1;
   }
-
+  
   public final int next(int nodeId) {
     return next[nodeId];
   }
-
+  
   public final int parent(int nodeId) {
     return parent[nodeId];
   }
-
+  
   public final void removeHeaderNext(int attributeValue) {
     int index = getHeaderIndex(attributeValue);
     headerTableProperties[index][HT_NEXT] = -1;
   }
-
+  
   public final void reorderHeaderTable() {
     // Arrays.sort(headerTableAttributes, 0, headerTableCount);
     int i = 0;
@@ -286,7 +291,7 @@
       headerTableAttributes[i++] = attr;
     }
   }
-
+  
   public void replaceChild(int parentNodeId, int replacableNode, int childnodeId) {
     for (int i = 0, j = childCount[parentNodeId]; i < j; i++) {
       if (nodeChildren[parentNodeId][i] == replacableNode) {
@@ -295,7 +300,7 @@
       }
     }
   }
-
+  
   public final boolean setConditional(int nodeId, int conditionalNode) {
     if (nodeId < nodes) {
       this.conditional[nodeId] = conditionalNode;
@@ -303,7 +308,7 @@
     }
     return false;
   }
-
+  
   public final boolean setNext(int nodeId, int nextNode) {
     if (nodeId < nodes) {
       this.next[nodeId] = nextNode;
@@ -311,11 +316,11 @@
     }
     return false;
   }
-
+  
   public final boolean setParent(int nodeId, int parentNode) {
     if (nodeId < nodes) {
       this.parent[nodeId] = parentNode;
-
+      
       int length = childCount[parentNode];
       if (length >= nodeChildren[parentNode].length) {
         resizeChildren(parentNode);
@@ -326,15 +331,15 @@
     }
     return false;
   }
-
+  
   public final void setSinglePath(boolean bit) {
     singlePath = bit;
   }
-
+  
   public final boolean singlePath() {
     return singlePath;
   }
-
+  
   private int getHeaderIndex(int attributeValue) {
     if (attributeValue >= headerTableLookup.length) {
       resizeHeaderLookup(attributeValue);
@@ -357,13 +362,13 @@
     }
     return index;
   }
-
+  
   private void resize() {
     int size = (int) (GROWTH_RATE * nodes);
     if (size < DEFAULT_INITIAL_SIZE) {
       size = DEFAULT_INITIAL_SIZE;
     }
-
+    
     int[] oldChildCount = childCount;
     int[] oldAttribute = attribute;
     long[] oldnodeCount = nodeCount;
@@ -371,16 +376,16 @@
     int[] oldNext = next;
     int[][] oldNodeChildren = nodeChildren;
     int[] oldConditional = conditional;
-
+    
     childCount = new int[size];
     attribute = new int[size];
     nodeCount = new long[size];
     parent = new int[size];
     next = new int[size];
-
+    
     nodeChildren = new int[size][];
     conditional = new int[size];
-
+    
     System.arraycopy(oldChildCount, 0, this.childCount, 0, nodes);
     System.arraycopy(oldAttribute, 0, this.attribute, 0, nodes);
     System.arraycopy(oldnodeCount, 0, this.nodeCount, 0, nodes);
@@ -389,7 +394,7 @@
     System.arraycopy(oldNodeChildren, 0, this.nodeChildren, 0, nodes);
     System.arraycopy(oldConditional, 0, this.conditional, 0, nodes);
   }
-
+  
   private void resizeChildren(int nodeId) {
     int length = childCount[nodeId];
     int size = (int) (GROWTH_RATE * length);
@@ -400,7 +405,7 @@
     nodeChildren[nodeId] = new int[size];
     System.arraycopy(oldNodeChildren, 0, this.nodeChildren[nodeId], 0, length);
   }
-
+  
   private void resizeHeaderLookup(int attributeValue) {
     int size = (int) (attributeValue * GROWTH_RATE);
     int[] oldLookup = headerTableLookup;
@@ -408,13 +413,13 @@
     Arrays.fill(headerTableLookup, oldLookup.length, size, -1);
     System.arraycopy(oldLookup, 0, this.headerTableLookup, 0, oldLookup.length);
   }
-
+  
   private void resizeHeaderTable() {
     int size = (int) (GROWTH_RATE * headerTableCount);
     if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE) {
       size = DEFAULT_HEADER_TABLE_INITIAL_SIZE;
     }
-
+    
     int[] oldAttributes = headerTableAttributes;
     long[] oldAttributeCount = headerTableAttributeCount;
     int[][] oldProperties = headerTableProperties;
@@ -422,10 +427,10 @@
     headerTableAttributeCount = new long[size];
     headerTableProperties = new int[size][];
     System.arraycopy(oldAttributes, 0, this.headerTableAttributes, 0,
-                     headerTableCount);
+      headerTableCount);
     System.arraycopy(oldAttributeCount, 0, this.headerTableAttributeCount, 0,
-                     headerTableCount);
+      headerTableCount);
     System.arraycopy(oldProperties, 0, this.headerTableProperties, 0,
-                     headerTableCount);
+      headerTableCount);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTreeDepthCache.java Fri Feb  5 11:00:25 2010
@@ -17,40 +17,47 @@
 
 package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
 
-import org.apache.mahout.common.cache.LeastKCache;
-
 import java.util.ArrayList;
+import java.util.List;
 
+import org.apache.mahout.common.cache.LeastKCache;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Caches large FPTree {@link Object} for each level of the recursive
+ * {@link FPGrowth} algorithm to reduce allocation overhead.
+ * 
+ */
 public class FPTreeDepthCache {
-
+  
   private static int firstLevelCacheSize = 5;
-
-  private static final Logger log = LoggerFactory.getLogger(FPTreeDepthCache.class);
-
-  private final LeastKCache<Integer, FPTree> firstLevelCache = new LeastKCache<Integer, FPTree>(
-      firstLevelCacheSize);
-
-  private int hits = 0;
-
-  private int misses = 0;
-
-  private final ArrayList<FPTree> treeCache = new ArrayList<FPTree>();
-
+  
+  private static final Logger log = LoggerFactory.getLogger(
+      FPTreeDepthCache.class);
+  
+  private final LeastKCache<Integer,FPTree> firstLevelCache 
+      = new LeastKCache<Integer,FPTree>(firstLevelCacheSize);
+  
+  private int hits;
+  
+  private int misses;
+  
+  private final List<FPTree> treeCache = new ArrayList<FPTree>();
+  
   public FPTreeDepthCache() {
-    log.info("Initializing FPTreeCache with firstLevelCacheSize: {}", firstLevelCacheSize);
+    log.info("Initializing FPTreeCache with firstLevelCacheSize: {}",
+      firstLevelCacheSize);
   }
   
   public static int getFirstLevelCacheSize() {
     return firstLevelCacheSize;
   }
-
+  
   public static void setFirstLevelCacheSize(int firstLevelCacheSize) {
     FPTreeDepthCache.firstLevelCacheSize = firstLevelCacheSize;
   }
-
+  
   public final FPTree getFirstLevelTree(int attr) {
     Integer attribute = attr;
     if (firstLevelCache.contains(attribute)) {
@@ -63,15 +70,15 @@
       return conditionalTree;
     }
   }
-
+  
   public final int getHits() {
     return hits;
   }
-
+  
   public final int getMisses() {
     return misses;
   }
-
+  
   public final FPTree getTree(int level) {
     while (treeCache.size() < level + 1) {
       FPTree cTree = new FPTree();
@@ -81,5 +88,5 @@
     conditionalTree.clear();
     return conditionalTree;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java Fri Feb  5 11:00:25 2010
@@ -25,24 +25,24 @@
 
 /** {@link FrequentPatternMaxHeap} keeps top K Attributes in a TreeSet */
 public final class FrequentPatternMaxHeap {
-
-  private int count = 0;
-
-  private Pattern least = null;
-
-  private int maxSize = 0;
-
-  private boolean subPatternCheck = false;
-
-  private Map<Long, Set<Pattern>> patternIndex = null;
-
-  private PriorityQueue<Pattern> queue = null;
-
+  
+  private int count;
+  
+  private Pattern least;
+  
+  private int maxSize;
+  
+  private boolean subPatternCheck;
+  
+  private Map<Long,Set<Pattern>> patternIndex;
+  
+  private PriorityQueue<Pattern> queue;
+  
   public FrequentPatternMaxHeap(int numResults, boolean subPatternCheck) {
     maxSize = numResults;
     queue = new PriorityQueue<Pattern>(maxSize);
     this.subPatternCheck = subPatternCheck;
-    patternIndex = new HashMap<Long, Set<Pattern>>();
+    patternIndex = new HashMap<Long,Set<Pattern>>();
     for (Pattern p : queue) {
       Long index = p.support();
       Set<Pattern> patternList;
@@ -52,22 +52,22 @@
       }
       patternList = patternIndex.get(index);
       patternList.add(p);
-
+      
     }
   }
-
+  
   public boolean addable(long support) {
     if (count < maxSize) {
       return true;
     }
     return least.support() <= support;
   }
-
+  
   public PriorityQueue<Pattern> getHeap() {
     if (subPatternCheck) {
       PriorityQueue<Pattern> ret = new PriorityQueue<Pattern>(maxSize);
       for (Pattern p : queue) {
-
+        
         if (patternIndex.get(p.support()).contains(p)) {
           ret.add(p);
         }
@@ -77,8 +77,10 @@
       return queue;
     }
   }
-
-  public void addAll(FrequentPatternMaxHeap patterns, int attribute, long attributeSupport) {
+  
+  public void addAll(FrequentPatternMaxHeap patterns,
+                     int attribute,
+                     long attributeSupport) {
     for (Pattern pattern : patterns.getHeap()) {
       long support = Math.min(attributeSupport, pattern.support());
       if (this.addable(support)) {
@@ -87,11 +89,11 @@
       }
     }
   }
-
+  
   public void insert(Pattern frequentPattern) {
     if (frequentPattern.length() == 0) {
       return;
-      }
+    }
     
     if (count == maxSize) {
       if (frequentPattern.compareTo(least) > 0) {
@@ -101,7 +103,7 @@
           if (subPatternCheck) {
             patternIndex.get(evictedItem.support()).remove(evictedItem);
           }
-
+          
         }
       }
     } else {
@@ -117,22 +119,22 @@
       }
     }
   }
-
+  
   public int count() {
     return count;
   }
-
+  
   public boolean isFull() {
     return count == maxSize;
   }
-
+  
   public long leastSupport() {
     if (least == null) {
       return 0;
     }
     return least.support();
   }
-
+  
   private boolean addPattern(Pattern frequentPattern) {
     if (subPatternCheck == false) {
       queue.add(frequentPattern);
@@ -154,7 +156,8 @@
         }
         if (replace) {
           indexSet.remove(replacablePattern);
-          if (indexSet.contains(frequentPattern) == false && queue.add(frequentPattern)) {
+          if (indexSet.contains(frequentPattern) == false
+              && queue.add(frequentPattern)) {
             
             indexSet.add(frequentPattern);
           }
@@ -172,7 +175,7 @@
         }
         patternList = patternIndex.get(index);
         patternList.add(frequentPattern);
-
+        
         return true;
       }
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Fri Feb  5 11:00:25 2010
@@ -19,28 +19,33 @@
 
 import java.util.Arrays;
 
+/**
+ * A {@link Pattern} in FPGrowth is a list of items (here int) and the
+ * support(the number of times the pattern is seen in the dataset)
+ * 
+ */
 public class Pattern implements Comparable<Pattern> {
-
+  
   private static final int DEFAULT_INITIAL_SIZE = 2;
-
+  
   private static final float GROWTH_RATE = 1.5f;
-
+  
   private boolean dirty = true;
-
+  
   private int hashCode;
-
-  private int length = 0;
-
+  
+  private int length;
+  
   private int[] pattern;
-
+  
   private long support = Long.MAX_VALUE;
-
+  
   private long[] supportValues;
-
+  
   public Pattern() {
     this(DEFAULT_INITIAL_SIZE);
   }
-
+  
   private Pattern(int size) {
     if (size < DEFAULT_INITIAL_SIZE) {
       size = DEFAULT_INITIAL_SIZE;
@@ -49,7 +54,7 @@
     this.supportValues = new long[size];
     dirty = true;
   }
-
+  
   public final void add(int id, long supportCount) {
     if (length >= pattern.length) {
       resize();
@@ -59,7 +64,7 @@
     this.support = (supportCount > this.support) ? this.support : supportCount;
     dirty = true;
   }
-
+  
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {
@@ -80,15 +85,15 @@
     }
     return Arrays.equals(pattern, other.pattern);
   }
-
+  
   public final int[] getPattern() {
     return this.pattern;
   }
-
+  
   public final Object[] getPatternWithSupport() {
     return new Object[] {this.pattern, this.supportValues};
   }
-
+  
   @Override
   public int hashCode() {
     if (dirty == false) {
@@ -101,7 +106,7 @@
     hashCode = result;
     return result;
   }
-
+  
   public final boolean isSubPatternOf(Pattern frequentPattern) {
     int[] otherPattern = frequentPattern.getPattern();
     int otherLength = frequentPattern.length();
@@ -122,22 +127,22 @@
     }
     return otherI != otherLength || i == length;
   }
-
+  
   public final int length() {
     return this.length;
   }
-
+  
   public final long support() {
     return this.support;
   }
-
+  
   @Override
   public final String toString() {
     int[] arr = new int[length];
     System.arraycopy(pattern, 0, arr, 0, length);
     return Arrays.toString(arr) + '-' + support;
   }
-
+  
   private void resize() {
     int size = (int) (GROWTH_RATE * length);
     if (size < DEFAULT_INITIAL_SIZE) {
@@ -150,7 +155,7 @@
     System.arraycopy(oldpattern, 0, this.pattern, 0, length);
     System.arraycopy(oldSupport, 0, this.supportValues, 0, length);
   }
-
+  
   @Override
   public int compareTo(Pattern cr2) {
     long support2 = cr2.support();
@@ -170,5 +175,5 @@
       }
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java Fri Feb  5 11:00:25 2010
@@ -34,7 +34,7 @@
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.fpm.pfpgrowth.convertors.ContextStatusUpdater;
 import org.apache.mahout.fpm.pfpgrowth.convertors.SequenceFileOutputCollector;
-import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConvertor;
+import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConverter;
 import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
 import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
 
@@ -64,7 +64,7 @@
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class,
         TopKStringPatterns.class);
     fp.generateTopKFrequentPatterns(transactions.iterator(), fp.generateFList(transactions.iterator(), 3),
-        3, 100, new HashSet<String>(), new StringOutputConvertor(
+        3, 100, new HashSet<String>(), new StringOutputConverter(
             new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
         new ContextStatusUpdater(null));
     writer.close();

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthJob.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthJob.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthJob.java Fri Feb  5 11:00:25 2010
@@ -45,7 +45,7 @@
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.fpm.pfpgrowth.convertors.ContextStatusUpdater;
 import org.apache.mahout.fpm.pfpgrowth.convertors.SequenceFileOutputCollector;
-import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConvertor;
+import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConverter;
 import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
 import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
 import org.slf4j.Logger;
@@ -201,7 +201,7 @@
     fp.generateTopKFrequentPatterns(new StringRecordIterator(new FileLineIterable(new File(input),
         encoding, false), pattern), fp.generateFList(new StringRecordIterator(new FileLineIterable(
             new File(input), encoding, false), pattern), minSupport), minSupport, maxHeapSize, features,
-        new StringOutputConvertor(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
+        new StringOutputConverter(new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
         new ContextStatusUpdater(null));
     writer.close();
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleMapper.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleMapper.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleMapper.java Fri Feb  5 11:00:25 2010
@@ -30,19 +30,28 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class KeyBasedStringTupleMapper extends Mapper<LongWritable, Text, Text, StringTuple> {
-
-  private static final Logger log = LoggerFactory.getLogger(KeyBasedStringTupleMapper.class);
-
-  private Pattern splitter = null;
-
-  private int[] selectedFields = null;
-
-  private int[] groupingFields = null;
-
+/**
+ * Splits the line using a {@link Pattern} and outputs key as given by the
+ * groupingFields
+ * 
+ */
+public class KeyBasedStringTupleMapper extends
+    Mapper<LongWritable,Text,Text,StringTuple> {
+  
+  private static final Logger log = LoggerFactory
+      .getLogger(KeyBasedStringTupleMapper.class);
+  
+  private Pattern splitter;
+  
+  private int[] selectedFields;
+  
+  private int[] groupingFields;
+  
   @Override
-  protected void map(LongWritable key, Text value, Context context) throws IOException,
-      InterruptedException {
+  protected void map(LongWritable key,
+                     Text value,
+                     Context context) throws IOException,
+                                     InterruptedException {
     String[] fields = splitter.split(value.toString());
     if (fields.length != 4) {
       log.info("{} {}", fields.length, value.toString());
@@ -50,37 +59,43 @@
       return;
     }
     List<String> oKey = new ArrayList<String>();
-    for (int i = 0, groupingFieldCount = groupingFields.length; i < groupingFieldCount; i++) {
+    for (int i = 0, groupingFieldCount = groupingFields.length;
+        i < groupingFieldCount; i++) {
       oKey.add(fields[groupingFields[i]]);
       context.setStatus(fields[groupingFields[i]]);
     }
-
+    
     List<String> oValue = new ArrayList<String>();
-    for (int i = 0, selectedFieldCount = selectedFields.length; i < selectedFieldCount; i++) {
+    for (int i = 0, selectedFieldCount = selectedFields.length;
+        i < selectedFieldCount; i++) {
       oValue.add(fields[selectedFields[i]]);
     }
-
+    
     context.write(new Text(oKey.toString()), new StringTuple(oValue));
-
+    
   }
-
+  
   @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+                                       InterruptedException {
     super.setup(context);
-    Parameters params = Parameters.fromString(context.getConfiguration().get("job.parameters", ""));
+    Parameters params = Parameters.fromString(
+        context.getConfiguration().get("job.parameters", ""));
     splitter = Pattern.compile(params.get("splitPattern", "[ \t]*\t[ \t]*"));
-
-    int selectedFieldCount = Integer.valueOf(params.get("selectedFieldCount", "0"));
+    
+    int selectedFieldCount = Integer.valueOf(
+        params.get("selectedFieldCount", "0"));
     selectedFields = new int[selectedFieldCount];
     for (int i = 0; i < selectedFieldCount; i++) {
       selectedFields[i] = Integer.valueOf(params.get("field" + i, "0"));
     }
-
-    int groupingFieldCount = Integer.valueOf(params.get("groupingFieldCount", "0"));
+    
+    int groupingFieldCount = Integer.valueOf(
+        params.get("groupingFieldCount", "0"));
     groupingFields = new int[groupingFieldCount];
     for (int i = 0; i < groupingFieldCount; i++) {
       groupingFields[i] = Integer.valueOf(params.get("gfield" + i, "0"));
     }
-
+    
   }
 }

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java?rev=906896&r1=906895&r2=906896&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java (original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java Fri Feb  5 11:00:25 2010
@@ -26,15 +26,18 @@
 import org.apache.mahout.common.Parameters;
 import org.apache.mahout.common.StringTuple;
 
-public class KeyBasedStringTupleReducer extends Reducer<Text, StringTuple, Text, Text> {
-
+public class KeyBasedStringTupleReducer extends
+    Reducer<Text,StringTuple,Text,Text> {
+  
   private int maxTransactionLength = 100;
-
+  
   @Override
-  protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException,
-      InterruptedException {
+  protected void reduce(Text key,
+                        Iterable<StringTuple> values,
+                        Context context) throws IOException,
+                                        InterruptedException {
     Set<String> items = new HashSet<String>();
-
+    
     for (StringTuple value : values) {
       for (String field : value.getEntries()) {
         items.add(field);
@@ -52,23 +55,26 @@
           sb.replace(0, sb.length(), "");
           sep = "";
         }
-
+        
         sb.append(sep).append(field);
         sep = "\t";
-
+        
         i++;
-
+        
       }
       if (sb.length() > 0) {
         context.write(null, new Text(sb.toString()));
       }
     }
   }
-
+  
   @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+                                       InterruptedException {
     super.setup(context);
-    Parameters params = Parameters.fromString(context.getConfiguration().get("job.parameters", ""));
-    maxTransactionLength = Integer.valueOf(params.get("maxTransactionLength", "100"));
+    Parameters params = Parameters.fromString(context.getConfiguration().get(
+      "job.parameters", ""));
+    maxTransactionLength = Integer.valueOf(params.get("maxTransactionLength",
+      "100"));
   }
 }