You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/03/10 00:48:05 UTC
svn commit: r921188 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java
Author: srowen
Date: Tue Mar 9 23:48:05 2010
New Revision: 921188
URL: http://svn.apache.org/viewvc?rev=921188&view=rev
Log:
Adjust Pattern's compareTo() / hashCode() behavior to ensure correctness and streamline a little
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java?rev=921188&r1=921187&r2=921188&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java Tue Mar 9 23:48:05 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
+import org.apache.mahout.common.RandomUtils;
+
import java.util.Arrays;
/**
@@ -56,57 +58,23 @@ public class Pattern implements Comparab
}
public final void add(int id, long supportCount) {
+ dirty = true;
if (length >= pattern.length) {
resize();
}
this.pattern[length] = id;
this.supportValues[length++] = supportCount;
this.support = supportCount > this.support ? this.support : supportCount;
- dirty = true;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
- if (obj == null) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- Pattern other = (Pattern) obj;
- if (length != other.length) {
- return false;
- }
- if (support != other.support) {
- return false;
- }
- return Arrays.equals(pattern, other.pattern);
}
-
+
public final int[] getPattern() {
return this.pattern;
}
-
+
public final Object[] getPatternWithSupport() {
return new Object[] {this.pattern, this.supportValues};
}
-
- @Override
- public int hashCode() {
- if (dirty == false) {
- return hashCode;
- }
- int result = 1;
- int prime = 31;
- result = prime * result + Arrays.hashCode(pattern);
- result = prime * result + Long.valueOf(support).hashCode();
- hashCode = result;
- return result;
- }
-
+
public final boolean isSubPatternOf(Pattern frequentPattern) {
int[] otherPattern = frequentPattern.getPattern();
int otherLength = frequentPattern.length();
@@ -127,22 +95,15 @@ public class Pattern implements Comparab
}
return otherI != otherLength || i == length;
}
-
+
public final int length() {
return this.length;
}
-
+
public final long support() {
return this.support;
}
-
- @Override
- public final String toString() {
- int[] arr = new int[length];
- System.arraycopy(pattern, 0, arr, 0, length);
- return Arrays.toString(arr) + '-' + support;
- }
-
+
private void resize() {
int size = (int) (GROWTH_RATE * length);
if (size < DEFAULT_INITIAL_SIZE) {
@@ -157,22 +118,53 @@ public class Pattern implements Comparab
}
@Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ Pattern other = (Pattern) obj;
+ return length == other.length && support == other.support && Arrays.equals(pattern, other.pattern);
+ }
+
+ @Override
+ public int hashCode() {
+ if (dirty == false) {
+ return hashCode;
+ }
+ int result = Arrays.hashCode(pattern);
+ result = 31 * result + RandomUtils.hashLong(support);
+ result = 31 * result + length;
+ hashCode = result;
+ return result;
+ }
+
+ @Override
+ public final String toString() {
+ int[] arr = new int[length];
+ System.arraycopy(pattern, 0, arr, 0, length);
+ return Arrays.toString(arr) + '-' + support;
+ }
+
+ @Override
public int compareTo(Pattern cr2) {
long support2 = cr2.support();
int length2 = cr2.length();
if (support == support2) {
- if (length == length2) {
- // if they are of same length and support order randomly
+ if (length < length2) {
+ return -1;
+ } else if (length > length2) {
return 1;
} else {
- return length - length2;
+ return 0;
}
} else {
- if (support > support2) {
- return 1;
- } else {
- return -1;
- }
+ return support > support2 ? 1 : -1;
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java?rev=921188&r1=921187&r2=921188&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthTest.java Tue Mar 9 23:48:05 2010
@@ -44,15 +44,15 @@ public class FPGrowthTest extends Mahout
FPGrowth<String> fp = new FPGrowth<String>();
- Collection<Pair<List<String>, Long>> transactions = new ArrayList<Pair<List<String>, Long>>();
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("E", "A", "D", "B"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("D", "A", "C", "E", "B"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("C", "A", "B", "E"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("B", "A", "D"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("D"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("D", "B"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("A", "D", "E"), 1L));
- transactions.add(new Pair<List<String>, Long>(Arrays.asList("B", "C"), 1L));
+ Collection<Pair<List<String>,Long>> transactions = new ArrayList<Pair<List<String>,Long>>();
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("E", "A", "D", "B"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "A", "C", "E", "B"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("C", "A", "B", "E"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "A", "D"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("D"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "B"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("A", "D", "E"), 1L));
+ transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "C"), 1L));
File tmpFile = File.createTempFile("fpgrowthTest", ".dat");
tmpFile.deleteOnExit();
@@ -61,20 +61,27 @@ public class FPGrowthTest extends Mahout
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class,
- TopKStringPatterns.class);
- fp.generateTopKFrequentPatterns(transactions.iterator(), fp.generateFList(transactions.iterator(), 3),
- 3, 100, new HashSet<String>(), new StringOutputConverter(
- new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
+
+ SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
+ fp.generateTopKFrequentPatterns(
+ transactions.iterator(),
+ fp.generateFList(transactions.iterator(), 3),
+ 3,
+ 100,
+ new HashSet<String>(),
+ new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
new ContextStatusUpdater(null));
writer.close();
List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(fs, conf, path);
assertEquals(
- "[(C,([B, C],3)), (E,([A, E],4), ([A, B, E],3), ([A, D, E],3)), (A,([A],5), ([A, B],4), ([A, E],4),"
- + " ([A, D],4), ([A, D, E],3), ([A, B, E],3), ([A, B, D],3)), (D,([D],6), ([A, D],4), ([B, D],4),"
- + " ([A, D, E],3), ([A, B, D],3)), (B,([B],6), ([A, B],4), ([B, D],4), ([A, B, E],3),"
- + " ([A, B, D],3), ([B, C],3))]", frequentPatterns.toString());
+ "[(C,([B, C],3)), "
+ + "(E,([A, E],4), ([A, B, E],3), ([A, D, E],3)), "
+ + "(A,([A],5), ([A, D],4), ([A, E],4), ([A, B],4), ([A, B, E],3), ([A, D, E],3), ([A, B, D],3)), "
+ + "(D,([D],6), ([B, D],4), ([A, D],4), ([A, D, E],3), ([A, B, D],3)), "
+ + "(B,([B],6), ([A, B],4), ([B, D],4), ([A, B, D],3), ([A, B, E],3), ([B, C],3))]",
+ frequentPatterns.toString());
}
}