You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2011/09/29 11:24:48 UTC
svn commit: r1177237 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence:
./ measures/
Author: ssc
Date: Thu Sep 29 09:24:47 2011
New Revision: 1177237
URL: http://svn.apache.org/viewvc?rev=1177237&view=rev
Log:
refined size constraints for pruning of similarity candidate pairs
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Thu Sep 29 09:24:47 2011
@@ -279,8 +279,9 @@ public class RowSimilarityJob extends Ab
int numNonZeroEntriesB = numNonZeroEntries.get(occurrenceB.index());
double maxValueA = maxValues.get(occurrenceA.index());
+ double maxValueB = maxValues.get(occurrenceB.index());
- return similarity.consider(numNonZeroEntriesA, numNonZeroEntriesB, maxValueA, threshold);
+ return similarity.consider(numNonZeroEntriesA, numNonZeroEntriesB, maxValueA, maxValueB, threshold);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java Thu Sep 29 09:24:47 2011
@@ -23,9 +23,4 @@ public class CityBlockSimilarity extends
public double similarity(double dots, double normA, double normB, int numberOfColumns) {
return 1.0 / (1.0 + normA + normB - 2 * dots);
}
-
- @Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
- return true;
- }
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java Thu Sep 29 09:24:47 2011
@@ -25,7 +25,8 @@ public class CooccurrenceCountSimilarity
}
@Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
+ public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold) {
return numNonZeroEntriesA >= threshold && numNonZeroEntriesB >= threshold;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java Thu Sep 29 09:24:47 2011
@@ -42,7 +42,9 @@ public class CosineSimilarity implements
}
@Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
- return numNonZeroEntriesB >= threshold / maxValueA;
+ public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold) {
+ return numNonZeroEntriesB >= threshold / maxValueA &&
+ numNonZeroEntriesA >= threshold / maxValueB;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java Thu Sep 29 09:24:47 2011
@@ -36,4 +36,9 @@ public abstract class CountbasedMeasure
return 1;
}
+ @Override
+ public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold) {
+ return true;
+ }
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java Thu Sep 29 09:24:47 2011
@@ -51,7 +51,8 @@ public class EuclideanDistanceSimilarity
}
@Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
+ public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold) {
return true;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java Thu Sep 29 09:24:47 2011
@@ -29,8 +29,4 @@ public class LoglikelihoodSimilarity ext
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
- @Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
- return true;
- }
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java Thu Sep 29 09:24:47 2011
@@ -25,7 +25,9 @@ public class TanimotoCoefficientSimilari
}
@Override
- public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold) {
- return numNonZeroEntriesA >= numNonZeroEntriesB * threshold;
+ public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold) {
+ return numNonZeroEntriesA >= numNonZeroEntriesB * threshold &&
+ numNonZeroEntriesB >= numNonZeroEntriesA * threshold;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java Thu Sep 29 09:24:47 2011
@@ -27,5 +27,6 @@ public interface VectorSimilarityMeasure
double norm(Vector vector);
double aggregate(double nonZeroValueA, double nonZeroValueB);
double similarity(double summedAggregations, double normA, double normB, int numberOfColumns);
- boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double threshold);
+ boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double maxValueB,
+ double threshold);
}