You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/03/10 00:48:05 UTC

svn commit: r921188 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java

Author: srowen
Date: Tue Mar  9 23:48:05 2010
New Revision: 921188

URL: http://svn.apache.org/viewvc?rev=921188&view=rev
Log:
Adjust Pattern's compareTo() / hashCode() behavior to ensure correctness and streamline a little

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=921188&r1=921187&r2=921188&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Tue Mar  9 23:48:05 2010
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
 
+import org.apache.mahout.common.RandomUtils;
+
 import java.util.Arrays;
 
 /**
@@ -56,57 +58,23 @@ public class Pattern implements Comparab
   }
   
   public final void add(int id, long supportCount) {
+    dirty = true;
     if (length >= pattern.length) {
       resize();
     }
     this.pattern[length] = id;
     this.supportValues[length++] = supportCount;
     this.support = supportCount > this.support ? this.support : supportCount;
-    dirty = true;
-  }
-  
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (obj == null) {
-      return false;
-    }
-    if (getClass() != obj.getClass()) {
-      return false;
-    }
-    Pattern other = (Pattern) obj;
-    if (length != other.length) {
-      return false;
-    }
-    if (support != other.support) {
-      return false;
-    }
-    return Arrays.equals(pattern, other.pattern);
   }
-  
+
   public final int[] getPattern() {
     return this.pattern;
   }
-  
+
   public final Object[] getPatternWithSupport() {
     return new Object[] {this.pattern, this.supportValues};
   }
-  
-  @Override
-  public int hashCode() {
-    if (dirty == false) {
-      return hashCode;
-    }
-    int result = 1;
-    int prime = 31;
-    result = prime * result + Arrays.hashCode(pattern);
-    result = prime * result + Long.valueOf(support).hashCode();
-    hashCode = result;
-    return result;
-  }
-  
+
   public final boolean isSubPatternOf(Pattern frequentPattern) {
     int[] otherPattern = frequentPattern.getPattern();
     int otherLength = frequentPattern.length();
@@ -127,22 +95,15 @@ public class Pattern implements Comparab
     }
     return otherI != otherLength || i == length;
   }
-  
+
   public final int length() {
     return this.length;
   }
-  
+
   public final long support() {
     return this.support;
   }
-  
-  @Override
-  public final String toString() {
-    int[] arr = new int[length];
-    System.arraycopy(pattern, 0, arr, 0, length);
-    return Arrays.toString(arr) + '-' + support;
-  }
-  
+
   private void resize() {
     int size = (int) (GROWTH_RATE * length);
     if (size < DEFAULT_INITIAL_SIZE) {
@@ -157,22 +118,53 @@ public class Pattern implements Comparab
   }
   
   @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    Pattern other = (Pattern) obj;
+    return length == other.length && support == other.support && Arrays.equals(pattern, other.pattern);
+  }
+  
+  @Override
+  public int hashCode() {
+    if (dirty == false) {
+      return hashCode;
+    }
+    int result = Arrays.hashCode(pattern);
+    result = 31 * result + RandomUtils.hashLong(support);
+    result = 31 * result + length;
+    hashCode = result;
+    return result;
+  }
+  
+  @Override
+  public final String toString() {
+    int[] arr = new int[length];
+    System.arraycopy(pattern, 0, arr, 0, length);
+    return Arrays.toString(arr) + '-' + support;
+  }
+  
+  @Override
   public int compareTo(Pattern cr2) {
     long support2 = cr2.support();
     int length2 = cr2.length();
     if (support == support2) {
-      if (length == length2) {
-        // if they are of same length and support order randomly
+      if (length < length2) {
+        return -1;
+      } else if (length > length2) {
         return 1;
       } else {
-        return length - length2;
+        return 0;
       }
     } else {
-      if (support > support2) {
-        return 1;
-      } else {
-        return -1;
-      }
+      return support > support2 ? 1 : -1;
     }
   }
   

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java?rev=921188&r1=921187&r2=921188&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java Tue Mar  9 23:48:05 2010
@@ -44,15 +44,15 @@ public class FPGrowthTest extends Mahout
 
     FPGrowth<String> fp = new FPGrowth<String>();
 
-    Collection<Pair<List<String>, Long>> transactions = new ArrayList<Pair<List<String>, Long>>();
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("E", "A", "D", "B"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("D", "A", "C", "E", "B"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("C", "A", "B", "E"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("B", "A", "D"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("D"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("D", "B"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("A", "D", "E"), 1L));
-    transactions.add(new Pair<List<String>, Long>(Arrays.asList("B", "C"), 1L));
+    Collection<Pair<List<String>,Long>> transactions = new ArrayList<Pair<List<String>,Long>>();
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("E", "A", "D", "B"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "A", "C", "E", "B"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("C", "A", "B", "E"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "A", "D"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("D"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "B"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("A", "D", "E"), 1L));
+    transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "C"), 1L));
 
     File tmpFile = File.createTempFile("fpgrowthTest", ".dat");
     tmpFile.deleteOnExit();
@@ -61,20 +61,27 @@ public class FPGrowthTest extends Mahout
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
 
-    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class,
-        TopKStringPatterns.class);
-    fp.generateTopKFrequentPatterns(transactions.iterator(), fp.generateFList(transactions.iterator(), 3),
-        3, 100, new HashSet<String>(), new StringOutputConverter(
-            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
+
+    SequenceFile.Writer writer =
+        new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
+    fp.generateTopKFrequentPatterns(
+        transactions.iterator(),
+        fp.generateFList(transactions.iterator(), 3),
+        3,
+        100,
+        new HashSet<String>(),
+        new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
         new ContextStatusUpdater(null));
     writer.close();
 
     List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(fs, conf, path);
     assertEquals(
-        "[(C,([B, C],3)), (E,([A, E],4), ([A, B, E],3), ([A, D, E],3)), (A,([A],5), ([A, B],4), ([A, E],4),"
-            + " ([A, D],4), ([A, D, E],3), ([A, B, E],3), ([A, B, D],3)), (D,([D],6), ([A, D],4), ([B, D],4),"
-            + " ([A, D, E],3), ([A, B, D],3)), (B,([B],6), ([A, B],4), ([B, D],4), ([A, B, E],3),"
-            + " ([A, B, D],3), ([B, C],3))]", frequentPatterns.toString());
+      "[(C,([B, C],3)), "
+          + "(E,([A, E],4), ([A, B, E],3), ([A, D, E],3)), "
+          + "(A,([A],5), ([A, D],4), ([A, E],4), ([A, B],4), ([A, B, E],3), ([A, D, E],3), ([A, B, D],3)), "
+          + "(D,([D],6), ([B, D],4), ([A, D],4), ([A, D, E],3), ([A, B, D],3)), "
+          + "(B,([B],6), ([A, B],4), ([B, D],4), ([A, B, D],3), ([A, B, E],3), ([B, C],3))]",
+      frequentPatterns.toString());
 
   }
 }