You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/05/02 20:14:25 UTC
svn commit: r1098706 [2/5] - in /mahout/trunk: core/src/main/java/
core/src/main/java/org/apache/mahout/
core/src/main/java/org/apache/mahout/cf/taste/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/t...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java Mon May 2 18:14:18 2011
@@ -289,7 +289,7 @@ public class GenericItemBasedRecommender
@Override
public double estimate(Long itemID) throws TasteException {
LongPair pair = new LongPair(toItemID, itemID);
- if ((rescorer != null) && rescorer.isFiltered(pair)) {
+ if (rescorer != null && rescorer.isFiltered(pair)) {
return Double.NaN;
}
double originalEstimate = similarity.itemSimilarity(toItemID, itemID);
@@ -335,7 +335,7 @@ public class GenericItemBasedRecommender
for (int i = 0; i < toItemIDs.length; i++) {
long toItemID = toItemIDs[i];
LongPair pair = new LongPair(toItemID, itemID);
- if ((rescorer != null) && rescorer.isFiltered(pair)) {
+ if (rescorer != null && rescorer.isFiltered(pair)) {
continue;
}
double estimate = similarities[i];
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java Mon May 2 18:14:18 2011
@@ -69,8 +69,8 @@ public final class GenericRecommendedIte
if (!(o instanceof GenericRecommendedItem)) {
return false;
}
- GenericRecommendedItem other = (GenericRecommendedItem) o;
- return (itemID == other.getItemID()) && (value == other.getValue());
+ RecommendedItem other = (RecommendedItem) o;
+ return itemID == other.getItemID() && value == other.getValue();
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Mon May 2 18:14:18 2011
@@ -53,7 +53,7 @@ public final class NearestNeighborCluste
/**
* <p>
* Constructs a based on the given {@link UserSimilarity}. By
- * setting <code>samplingRate</code> to a value less than 1.0, this implementation will only examine that
+ * setting {@code samplingRate} to a value less than 1.0, this implementation will only examine that
* fraction of all user-user similarities between two clusters, increasing performance at the expense of
* accuracy.
* </p>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java Mon May 2 18:14:18 2011
@@ -49,7 +49,7 @@ public final class SimilarUser implement
return false;
}
SimilarUser other = (SimilarUser) o;
- return (userID == other.getUserID()) && (similarity == other.getSimilarity());
+ return userID == other.getUserID() && similarity == other.getSimilarity();
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java Mon May 2 18:14:18 2011
@@ -59,7 +59,7 @@ public final class TopItems {
double lowestTopValue = Double.NEGATIVE_INFINITY;
while (possibleItemIDs.hasNext()) {
long itemID = possibleItemIDs.next();
- if ((rescorer == null) || !rescorer.isFiltered(itemID)) {
+ if (rescorer == null || !rescorer.isFiltered(itemID)) {
double preference;
try {
preference = estimator.estimate(itemID);
@@ -67,7 +67,7 @@ public final class TopItems {
continue;
}
double rescoredPref = rescorer == null ? preference : rescorer.rescore(itemID, preference);
- if (!Double.isNaN(rescoredPref) && (!full || (rescoredPref > lowestTopValue))) {
+ if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > lowestTopValue)) {
topItems.add(new GenericRecommendedItem(itemID, (float) rescoredPref));
if (full) {
topItems.poll();
@@ -98,7 +98,7 @@ public final class TopItems {
double lowestTopValue = Double.NEGATIVE_INFINITY;
while (allUserIDs.hasNext()) {
long userID = allUserIDs.next();
- if ((rescorer != null) && rescorer.isFiltered(userID)) {
+ if (rescorer != null && rescorer.isFiltered(userID)) {
continue;
}
double similarity;
@@ -108,7 +108,7 @@ public final class TopItems {
continue;
}
double rescoredSimilarity = rescorer == null ? similarity : rescorer.rescore(userID, similarity);
- if (!Double.isNaN(rescoredSimilarity) && (!full || (rescoredSimilarity > lowestTopValue))) {
+ if (!Double.isNaN(rescoredSimilarity) && (!full || rescoredSimilarity > lowestTopValue)) {
topUsers.add(new SimilarUser(userID, similarity));
if (full) {
topUsers.poll();
@@ -153,7 +153,7 @@ public final class TopItems {
while (allSimilarities.hasNext()) {
GenericItemSimilarity.ItemItemSimilarity similarity = allSimilarities.next();
double value = similarity.getValue();
- if (!Double.isNaN(value) && (!full || (value > lowestTopValue))) {
+ if (!Double.isNaN(value) && (!full || value > lowestTopValue)) {
topSimilarities.add(similarity);
if (full) {
topSimilarities.poll();
@@ -185,7 +185,7 @@ public final class TopItems {
while (allSimilarities.hasNext()) {
GenericUserSimilarity.UserUserSimilarity similarity = allSimilarities.next();
double value = similarity.getValue();
- if (!Double.isNaN(value) && (!full || (value > lowestTopValue))) {
+ if (!Double.isNaN(value) && (!full || value > lowestTopValue)) {
topSimilarities.add(similarity);
if (full) {
topSimilarities.poll();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Mon May 2 18:14:18 2011
@@ -85,7 +85,7 @@ public final class TreeClusteringRecomme
* @param numClusters
* desired number of clusters to create
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
+ * if arguments are {@code null}, or {@code numClusters} is less than 2
*/
public TreeClusteringRecommender(DataModel dataModel, ClusterSimilarity clusterSimilarity, int numClusters)
throws TasteException {
@@ -103,7 +103,7 @@ public final class TreeClusteringRecomme
* percentage of all cluster-cluster pairs to consider when finding next-most-similar clusters.
* Decreasing this value from 1.0 can increase performance at the cost of accuracy
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>numClusters</code> is less than 2, or samplingRate
+ * if arguments are {@code null}, or {@code numClusters} is less than 2, or samplingRate
* is {@link Double#NaN} or nonpositive or greater than 1.0
*/
public TreeClusteringRecommender(DataModel dataModel,
@@ -141,7 +141,7 @@ public final class TreeClusteringRecomme
* clustering similarity threshold; clusters will be aggregated into larger clusters until the next
* two nearest clusters' similarity drops below this threshold
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>clusteringThreshold</code> is {@link Double#NaN}
+ * if arguments are {@code null}, or {@code clusteringThreshold} is {@link Double#NaN}
*/
public TreeClusteringRecommender(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
@@ -161,7 +161,7 @@ public final class TreeClusteringRecomme
* percentage of all cluster-cluster pairs to consider when finding next-most-similar clusters.
* Decreasing this value from 1.0 can increase performance at the cost of accuracy
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>clusteringThreshold</code> is {@link Double#NaN},
+ * if arguments are {@code null}, or {@code clusteringThreshold} is {@link Double#NaN},
* or samplingRate is {@link Double#NaN} or nonpositive or greater than 1.0
*/
public TreeClusteringRecommender(DataModel dataModel,
@@ -207,11 +207,11 @@ public final class TreeClusteringRecomme
// And that the rescorer doesn't "reject".
for (RecommendedItem recommendedItem : recommended) {
long itemID = recommendedItem.getItemID();
- if ((rescorer != null) && rescorer.isFiltered(itemID)) {
+ if (rescorer != null && rescorer.isFiltered(itemID)) {
continue;
}
- if ((dataModel.getPreferenceValue(userID, itemID) == null)
- && ((rescorer == null) || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
+ if (dataModel.getPreferenceValue(userID, itemID) == null
+ && (rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
rescored.add(recommendedItem);
}
}
@@ -324,10 +324,10 @@ public final class TreeClusteringRecomme
for (int i = 0; i < size; i++) {
FastIDSet cluster1 = clusters.get(i);
for (int j = i + 1; j < size; j++) {
- if ((samplingRate >= 1.0) || (RANDOM.nextDouble() < samplingRate)) {
+ if (samplingRate >= 1.0 || RANDOM.nextDouble() < samplingRate) {
FastIDSet cluster2 = clusters.get(j);
double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
- if (!Double.isNaN(similarity) && (similarity > bestSimilarity)) {
+ if (!Double.isNaN(similarity) && similarity > bestSimilarity) {
bestSimilarity = similarity;
nearestPair = new Pair<FastIDSet,FastIDSet>(cluster1, cluster2);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Mon May 2 18:14:18 2011
@@ -91,7 +91,7 @@ public final class TreeClusteringRecomme
* @param numClusters
* desired number of clusters to create
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>numClusters</code> is less than 2
+ * if arguments are {@code null}, or {@code numClusters} is less than 2
*/
public TreeClusteringRecommender2(DataModel dataModel, ClusterSimilarity clusterSimilarity, int numClusters)
throws TasteException {
@@ -116,22 +116,22 @@ public final class TreeClusteringRecomme
/**
* @param dataModel
- * {@link org.apache.mahout.cf.taste.model.DataModel} which provides users
+ * {@link DataModel} which provides users
* @param clusterSimilarity
- * {@link org.apache.mahout.cf.taste.impl.recommender.ClusterSimilarity} used to compute cluster
+ * {@link ClusterSimilarity} used to compute cluster
* similarity
* @param clusteringThreshold
* clustering similarity threshold; clusters will be aggregated into larger clusters until the next
* two nearest clusters' similarity drops below this threshold
* @throws IllegalArgumentException
- * if arguments are <code>null</code>, or <code>clusteringThreshold</code> is {@link Double#NaN}
+ * if arguments are {@code null}, or {@code clusteringThreshold} is {@link Double#NaN}
*/
public TreeClusteringRecommender2(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
double clusteringThreshold) throws TasteException {
super(dataModel);
Preconditions.checkArgument(clusterSimilarity != null, "clusterSimilarity is null");
- Preconditions.checkArgument(!(Double.isNaN(clusteringThreshold)), "clusteringThreshold must not be NaN");
+ Preconditions.checkArgument(!Double.isNaN(clusteringThreshold), "clusteringThreshold must not be NaN");
this.clusterSimilarity = clusterSimilarity;
this.numClusters = Integer.MIN_VALUE;
this.clusteringThreshold = clusteringThreshold;
@@ -166,11 +166,11 @@ public final class TreeClusteringRecomme
// And that the rescorer doesn't "reject".
for (RecommendedItem recommendedItem : recommended) {
long itemID = recommendedItem.getItemID();
- if ((rescorer != null) && rescorer.isFiltered(itemID)) {
+ if (rescorer != null && rescorer.isFiltered(itemID)) {
continue;
}
- if ((dataModel.getPreferenceValue(userID, itemID) == null)
- && ((rescorer == null) || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
+ if (dataModel.getPreferenceValue(userID, itemID) == null
+ && (rescorer == null || !Double.isNaN(rescorer.rescore(itemID, recommendedItem.getValue())))) {
rescored.add(recommendedItem);
}
}
@@ -308,14 +308,14 @@ public final class TreeClusteringRecomme
while (!queue.isEmpty()) {
- if (!clusteringByThreshold && (clusters.size() <= numClusters)) {
+ if (!clusteringByThreshold && clusters.size() <= numClusters) {
done = true;
break;
}
ClusterClusterPair top = queue.remove(0);
- if (clusteringByThreshold && (top.getSimilarity() < clusteringThreshold)) {
+ if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold) {
done = true;
break;
}
@@ -330,10 +330,10 @@ public final class TreeClusteringRecomme
while (clusterIterator.hasNext() && !(removed1 && removed2)) {
FastIDSet current = clusterIterator.next();
// Yes, use == here
- if (!removed1 && (cluster1 == current)) {
+ if (!removed1 && cluster1 == current) {
clusterIterator.remove();
removed1 = true;
- } else if (!removed2 && (cluster2 == current)) {
+ } else if (!removed2 && cluster2 == current) {
clusterIterator.remove();
removed2 = true;
}
@@ -345,7 +345,7 @@ public final class TreeClusteringRecomme
ClusterClusterPair pair = queueIterator.next();
FastIDSet pair1 = pair.getCluster1();
FastIDSet pair2 = pair.getCluster2();
- if ((pair1 == cluster1) || (pair1 == cluster2) || (pair2 == cluster1) || (pair2 == cluster2)) {
+ if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2) {
queueIterator.remove();
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/NonNegativeQuadraticOptimizer.java Mon May 2 18:14:18 2011
@@ -60,7 +60,7 @@ public final class NonNegativeQuadraticO
// find active variables - those that are pinned due to
// nonnegativity constraints; set respective ri's to zero
- if ((x[n] < EPSILON) && (rn < 0.0)) {
+ if (x[n] < EPSILON && rn < 0.0) {
rn = 0.0;
} else {
// max step size numerator
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java Mon May 2 18:14:18 2011
@@ -66,24 +66,24 @@ public final class MemoryDiffStorage imp
/**
* <p>
- * See {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender} for the meaning of
- * <code>stdDevWeighted</code>. <code>maxEntries</code> controls the maximum number of
+ * See {@link SlopeOneRecommender} for the meaning of
+ * {@code stdDevWeighted}. {@code maxEntries} controls the maximum number of
* item-item average preference differences that will be tracked internally. After the limit is reached, if
* a new item-item pair is observed in the data it will be ignored. This is recommended for large datasets.
- * The first <code>maxEntries</code> item-item pairs observed in the data are tracked. Assuming that item
+ * The first {@code maxEntries} item-item pairs observed in the data are tracked. Assuming that item
* ratings are reasonably distributed among users, this should only ignore item-item pairs that are very
* infrequently co-rated by a user. The intuition is that data on these infrequently co-rated item-item
* pairs is less reliable and should be the first that is ignored. This parameter can be used to limit the
* memory requirements of {@link SlopeOneRecommender}, which otherwise grow as the square of the number of
* items that exist in the {@link DataModel}. Memory requirements can reach gigabytes with only about 10000
* items, so this may be necessary on larger datasets.
- *
+ *
* @param stdDevWeighted
- * see {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}
+ * see {@link SlopeOneRecommender}
* @param maxEntries
* maximum number of item-item average preference differences to track internally
* @throws IllegalArgumentException
- * if <code>maxEntries</code> is not positive or <code>dataModel</code> is null
+ * if {@code maxEntries} is not positive or {@code dataModel} is null
*/
public MemoryDiffStorage(DataModel dataModel,
Weighting stdDevWeighted,
@@ -371,7 +371,7 @@ public final class MemoryDiffStorage imp
// This is a performance-critical block
long itemIDB = userPreferences.getItemID(j);
RunningAverage average = aMap.get(itemIDB);
- if ((average == null) && (averageCount < maxEntries)) {
+ if (average == null && averageCount < maxEntries) {
average = buildRunningAverage();
aMap.put(itemIDB, average);
averageCount++;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java Mon May 2 18:14:18 2011
@@ -76,19 +76,19 @@ public final class SlopeOneRecommender e
* <p>
* Creates a based on the given {@link DataModel}.
* </p>
- *
+ *
* <p>
- * If <code>weighted</code> is set, acts as a weighted slope one recommender. This implementation also
+ * If {@code weighted} is set, acts as a weighted slope one recommender. This implementation also
* includes an experimental "standard deviation" weighting which weights item-item ratings diffs with lower
* standard deviation more highly, on the theory that they are more reliable.
* </p>
- *
+ *
* @param weighting
* if {@link Weighting#WEIGHTED}, acts as a weighted slope one recommender
* @param stdDevWeighting
* use optional standard deviation weighting of diffs
* @throws IllegalArgumentException
- * if <code>diffStorage</code> is null, or stdDevWeighted is set when weighted is not set
+ * if {@code diffStorage} is null, or stdDevWeighted is set when weighted is not set
*/
public SlopeOneRecommender(DataModel dataModel,
Weighting weighting,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java Mon May 2 18:14:18 2011
@@ -126,7 +126,7 @@ public final class FileDiffStorage imple
private long processLine(String line, char delimiter, long averageCount) {
- if ((line.length() == 0) || (line.charAt(0) == COMMENT_CHAR)) {
+ if (line.length() == 0 || line.charAt(0) == COMMENT_CHAR) {
return averageCount;
}
@@ -151,7 +151,7 @@ public final class FileDiffStorage imple
averageDiffs.put(itemID1, level1Map);
}
RunningAverage average = level1Map.get(itemID2);
- if ((average == null) && (averageCount < maxEntries)) {
+ if (average == null && averageCount < maxEntries) {
average = new FullRunningAverage();
level1Map.put(itemID2, average);
averageCount++;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Mon May 2 18:14:18 2011
@@ -28,7 +28,7 @@ import org.apache.mahout.cf.taste.impl.m
* operations that are needed on the preference data in the database. It assumes the database has a schema
* like:
* </p>
- *
+ *
* <table>
* <tr>
* <th>item_id_a</th>
@@ -59,19 +59,19 @@ import org.apache.mahout.cf.taste.impl.m
* <td>1</td>
* </tr>
* </table>
- *
+ *
* <p>
- * <code>item_id_a</code> and <code>item_id_b</code> should have types compatible with the long primitive
- * type. <code>average_diff</code> and <code>standard_deviation</code> must be compatible with
- * <code>float</code> and <code>count</code> must be compatible with <code>int</code>.
+ * {@code item_id_a} and {@code item_id_b} should have types compatible with the long primitive
+ * type. {@code average_diff} and {@code standard_deviation} must be compatible with
+ * {@code float} and {@code count} must be compatible with {@code int}.
* </p>
- *
+ *
* <p>
* The following command sets up a suitable table in MySQL:
* </p>
- *
+ *
* <p>
- *
+ *
* <pre>
* CREATE TABLE taste_slopeone_diffs (
* item_id_a BIGINT NOT NULL,
@@ -84,7 +84,7 @@ import org.apache.mahout.cf.taste.impl.m
* INDEX (item_id_b)
* )
* </pre>
- *
+ *
* </p>
*/
public final class MySQLJDBCDiffStorage extends AbstractJDBCDiffStorage {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java Mon May 2 18:14:18 2011
@@ -40,8 +40,9 @@ import java.util.concurrent.TimeUnit;
/**
* factorizes the rating matrix using "Alternating-Least-Squares with Weighted-λ-Regularization" as described in
- * the paper "Large-scale Collaborative Filtering for the Netflix Prize" available at
- * {@see http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf}
+ * the paper
+ * <a href="http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf">
+ * "Large-scale Collaborative Filtering for the Netflix Prize"</a>
*/
public class ALSWRFactorizer extends AbstractFactorizer {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVDFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVDFactorizer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVDFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ExpectationMaximizationSVDFactorizer.java Mon May 2 18:14:18 2011
@@ -87,8 +87,8 @@ public final class ExpectationMaximizati
double average = getAveragePreference();
double prefInterval = dataModel.getMaxPreference() - dataModel.getMinPreference();
- defaultValue = Math.sqrt((average - (prefInterval * 0.1)) / numFeatures);
- interval = (prefInterval * 0.1) / numFeatures;
+ defaultValue = Math.sqrt((average - prefInterval * 0.1) / numFeatures);
+ interval = prefInterval * 0.1 / numFeatures;
for (int feature = 0; feature < numFeatures; feature++) {
for (int userIndex = 0; userIndex < dataModel.getNumUsers(); userIndex++) {
@@ -155,7 +155,7 @@ public final class ExpectationMaximizati
double sum = pref.getCache();
sum += leftVectors[i][f] * rightVectors[j][f];
if (trailing) {
- sum += (numFeatures - f - 1) * ((defaultValue + interval) * (defaultValue + interval));
+ sum += (numFeatures - f - 1) * (defaultValue + interval) * (defaultValue + interval);
if (sum > maxPreference) {
sum = maxPreference;
} else if (sum < minPreference) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Mon May 2 18:14:18 2011
@@ -115,7 +115,7 @@ public final class SVDRecommender extend
refreshHelper.addDependency(factorizer);
}
- protected static PersistenceStrategy getDefaultPersistenceStrategy() {
+ static PersistenceStrategy getDefaultPersistenceStrategy() {
return new NoPersistenceStrategy();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Mon May 2 18:14:18 2011
@@ -138,7 +138,7 @@ abstract class AbstractSimilarity extend
int xLength = xPrefs.length();
int yLength = yPrefs.length();
- if ((xLength == 0) || (yLength == 0)) {
+ if (xLength == 0 || yLength == 0) {
return Double.NaN;
}
@@ -268,7 +268,7 @@ abstract class AbstractSimilarity extend
int xLength = xPrefs.length();
int yLength = yPrefs.length();
- if ((xLength == 0) || (yLength == 0)) {
+ if (xLength == 0 || yLength == 0) {
return Double.NaN;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java Mon May 2 18:14:18 2011
@@ -190,11 +190,10 @@ public final class GenericItemSimilarity
/**
* <p>
* Returns the similarity between two items. Note that similarity is assumed to be symmetric, that
- * <code>itemSimilarity(item1, item2) == itemSimilarity(item2, item1)</code>, and that
- * <code>itemSimilarity(item1,
- * item1) == 1.0</code> for all items.
+ * {@code itemSimilarity(item1, item2) == itemSimilarity(item2, item1)}, and that
+ * {@code itemSimilarity(item1,item1) == 1.0} for all items.
* </p>
- *
+ *
* @param itemID1
* first item
* @param itemID2
@@ -298,8 +297,9 @@ public final class GenericItemSimilarity
return false;
}
ItemItemSimilarity otherSimilarity = (ItemItemSimilarity) other;
- return (otherSimilarity.getItemID1() == itemID1) && (otherSimilarity.getItemID2() == itemID2)
- && (otherSimilarity.getValue() == value);
+ return otherSimilarity.getItemID1() == itemID1
+ && otherSimilarity.getItemID2() == itemID2
+ && otherSimilarity.getValue() == value;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java Mon May 2 18:14:18 2011
@@ -182,8 +182,9 @@ public final class GenericUserSimilarity
return false;
}
UserUserSimilarity otherSimilarity = (UserUserSimilarity) other;
- return (otherSimilarity.getUserID1() == userID1) && (otherSimilarity.getUserID2() == userID2)
- && (otherSimilarity.getValue() == value);
+ return otherSimilarity.getUserID1() == userID1
+ && otherSimilarity.getUserID2() == userID2
+ && otherSimilarity.getValue() == value;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java Mon May 2 18:14:18 2011
@@ -27,26 +27,26 @@ import com.google.common.base.Preconditi
* <p>
* An implementation of the Pearson correlation. For users X and Y, the following values are calculated:
* </p>
- *
+ *
* <ul>
* <li>sumX2: sum of the square of all X's preference values</li>
* <li>sumY2: sum of the square of all Y's preference values</li>
* <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and Y express a
* preference</li>
* </ul>
- *
+ *
* <p>
* The correlation is then:
- *
+ *
* <p>
- * <code>sumXY / sqrt(sumX2 * sumY2)</code>
+ * {@code sumXY / sqrt(sumX2 * sumY2)}
* </p>
- *
+ *
* <p>
* Note that this correlation "centers" its data, shifts the user's preference values so that each of their
* means is 0. This is necessary to achieve expected behavior on all data sets.
* </p>
- *
+ *
* <p>
* This correlation implementation is equivalent to the cosine similarity since the data it receives
* is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Mon May 2 18:14:18 2011
@@ -52,7 +52,7 @@ public final class SpearmanCorrelationSi
int xLength = xPrefs.length();
int yLength = yPrefs.length();
- if ((xLength <= 1) || (yLength <= 1)) {
+ if (xLength <= 1 || yLength <= 1) {
return Double.NaN;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java Mon May 2 18:14:18 2011
@@ -38,8 +38,8 @@ import com.google.common.base.Preconditi
* </p>
*
* <p>
- * The similarity value is assumed to be parseable as a <code>double</code> having a value between -1 and 1. The
- * item IDs are parsed as <code>long</code>s. Similarities are symmetric so for a pair of items you do not have to
+ * The similarity value is assumed to be parseable as a {@code double} having a value between -1 and 1. The
+ * item IDs are parsed as {@code long}s. Similarities are symmetric so for a pair of items you do not have to
* include 2 lines in the file.
* </p>
*
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java Mon May 2 18:14:18 2011
@@ -54,15 +54,15 @@ public final class CaseAmplification imp
* Transforms one similarity value. This implementation is such that it's possible to define this
* transformation on one value in isolation. The "thing" parameters are therefore unused.
* </p>
- *
+ *
* @param id1
* unused
* @param id2
* unused
* @param value
* similarity to transform
- * @return <code>value<sup>factor</sup></code> if value is nonnegative;
- * <code>-value<sup>-factor</sup></code> otherwise
+ * @return {@code value<sup>factor</sup>} if value is nonnegative;
+ * {@code -value<sup>-factor</sup>} otherwise
*/
@Override
public double transformSimilarity(long id1, long id2, double value) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java Mon May 2 18:14:18 2011
@@ -62,13 +62,13 @@ public final class InverseUserFrequency
* <p>
* Creates a transformation. Computations use the given log base.
* </p>
- *
+ *
* @param dataModel
* {@link DataModel} from which to calculate user frequencies
* @param logBase
* calculation logarithm base
* @throws IllegalArgumentException
- * if dataModel is <code>null</code> or logBase is {@link Double#NaN} or <= 1.0
+ * if dataModel is {@code null} or logBase is {@link Double#NaN} or <= 1.0
*/
public InverseUserFrequency(DataModel dataModel, double logBase) throws TasteException {
Preconditions.checkArgument(dataModel != null, "dataModel is null");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java Mon May 2 18:14:18 2011
@@ -36,11 +36,11 @@ public interface IDRescorer {
double rescore(long id, double originalScore);
/**
- * Returns <code>true</code> to exclude the given thing.
- *
+ * Returns {@code true} to exclude the given thing.
+ *
* @param id
* ID of thing (user, item, etc.) to rescore
- * @return <code>true</code> to exclude, <code>false</code> otherwise
+ * @return {@code true} to exclude, {@code false} otherwise
*/
boolean isFiltered(long id);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java Mon May 2 18:14:18 2011
@@ -24,9 +24,9 @@ package org.apache.mahout.cf.taste.recom
* the results according to application-specific logic before returning recommendations. For example, an
* application may want to boost the score of items in a certain category just for one request.
* </p>
- *
+ *
* <p>
- * A can also exclude a thing from consideration entirely by returning <code>true</code> from
+ * A can also exclude a thing from consideration entirely by returning {@code true} from
* {@link #isFiltered(Object)}.
* </p>
*/
@@ -42,11 +42,11 @@ public interface Rescorer<T> {
double rescore(T thing, double originalScore);
/**
- * Returns <code>true</code> to exclude the given thing.
- *
+ * Returns {@code true} to exclude the given thing.
+ *
* @param thing
* the thing to filter
- * @return <code>true</code> to exclude, <code>false</code> otherwise
+ * @return {@code true} to exclude, {@code false} otherwise
*/
boolean isFiltered(T thing);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java Mon May 2 18:14:18 2011
@@ -37,7 +37,7 @@ public interface DiffStorage extends Ref
/**
* @return {@link RunningAverage} encapsulating the average difference in preferences between items
- * corresponding to <code>itemID1</code> and <code>itemID2</code>, in that direction; that is, it's
+ * corresponding to {@code itemID1} and {@code itemID2}, in that direction; that is, it's
* the average of item 2's preferences minus item 1's preferences
*/
RunningAverage getDiff(long itemID1, long itemID2) throws TasteException;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java Mon May 2 18:14:18 2011
@@ -58,10 +58,7 @@ public interface ItemSimilarity extends
double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException;
/**
- * <p>return all ids of item similar in no particular order</p>
- * @param itemID
- * @return
- * @throws TasteException
+ * @return all IDs of similar items, in no particular order
*/
long[] allSimilarItemIDs(long itemID) throws TasteException;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/AbstractVectorClassifier.java Mon May 2 18:14:18 2011
@@ -62,7 +62,7 @@ public abstract class AbstractVectorClas
/**
* Classifies a vector in the special case of a binary classifier where
- * <code>classify(Vector)</code> would return a vector with only one element. As such,
+ * {@link #classify(Vector)} would return a vector with only one element. As such,
* using this method can void the allocation of a vector.
* @param instance The feature vector to be classified.
* @return The score for category 1.
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Mon May 2 18:14:18 2011
@@ -181,9 +181,9 @@ public final class TestClassifier {
params.set("alpha_i", alphaI);
params.set("testDirPath", testDirPath);
- if (classificationMethod.equalsIgnoreCase("sequential")) {
+ if ("sequential".equalsIgnoreCase(classificationMethod)) {
classifySequential(params);
- } else if (classificationMethod.equalsIgnoreCase("mapreduce")) {
+ } else if ("mapreduce".equalsIgnoreCase(classificationMethod)) {
classifyParallel(params);
}
} catch (OptionException e) {
@@ -205,12 +205,12 @@ public final class TestClassifier {
Algorithm algorithm;
Datastore datastore;
- if (params.get("dataSource").equals("hdfs")) {
- if (params.get("classifierType").equalsIgnoreCase("bayes")) {
+ if ("hdfs".equals(params.get("dataSource"))) {
+ if ("bayes".equalsIgnoreCase(params.get("classifierType"))) {
log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
- } else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
+ } else if ("cbayes".equalsIgnoreCase(params.get("classifierType"))) {
log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Mon May 2 18:14:18 2011
@@ -89,15 +89,13 @@ public class BayesAlgorithm implements A
@Override
public double featureWeight(Datastore datastore, String label, String feature) throws InvalidDatastoreException {
-
double result = datastore.getWeight("weight", feature, label);
double vocabCount = datastore.getWeight("sumWeight", "vocabCount");
double sumLabelWeight = datastore.getWeight("labelWeight", label);
double numerator = result + datastore.getWeight("params", "alpha_i");
double denominator = sumLabelWeight + vocabCount;
double weight = Math.log(numerator / denominator);
- result = -weight;
- return result;
+ return -weight;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Mon May 2 18:14:18 2011
@@ -104,9 +104,7 @@ public class CBayesAlgorithm implements
double weight = Math.log(numerator / denominator);
- result = weight / thetaNormalizer;
-
- return result;
+ return weight / thetaNormalizer;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java Mon May 2 18:14:18 2011
@@ -52,18 +52,18 @@ public interface Algorithm {
String defaultCategory) throws InvalidDatastoreException;
/**
- * Classify the document and return the top <code>numResults</code>
- *
+ * Classify the document and return the top {@code numResults}
+ *
* @param document
* The document to classify
* @param datastore
- * The {@link org.apache.mahout.classifier.bayes.interfaces.Datastore} (InMemory)
+ * The {@link Datastore} (InMemory)
* @param defaultCategory
* The default category to assign
* @param numResults
* The maximum number of results to return, ranked by score. Ties are broken by comparing the
* category
- * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
+ * @return A Collection of {@link ClassifierResult}s.
* @throws InvalidDatastoreException
*/
ClassifierResult[] classifyDocument(String[] document,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java Mon May 2 18:14:18 2011
@@ -27,9 +27,9 @@ import org.apache.mahout.classifier.baye
*/
public interface Datastore {
/**
- * Gets a double value from the Matrix pointed to by the <code>matrixName</code> from its cell pointed to by
- * the <code>row</code> and <code>column</code> string
- *
+ * Gets a double value from the Matrix pointed to by the {@code matrixName} from its cell pointed to by
+ * the {@code row} and {@code column} string
+ *
* @param matrixName
* @param row
* @param column
@@ -39,9 +39,9 @@ public interface Datastore {
double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException;
/**
- * Gets a double value from the Vector pointed to by the <code>vectorName</code> from its cell pointed to by
- * the <code>index</code>
- *
+ * Gets a double value from the Vector pointed to by the {@code vectorName} from its cell pointed to by
+ * the {@code index}
+ *
* @param vectorName
* @param index
* @return double value
@@ -50,8 +50,8 @@ public interface Datastore {
double getWeight(String vectorName, String index) throws InvalidDatastoreException;
/**
- * get the keySet of a given Matrix/Vector as given by <code>name</code>
- *
+ * get the keySet of a given Matrix/Vector as given by {@code name}
+ *
* @param name
* @return Collection of keys of Matrix/Vector
* @throws InvalidDatastoreException
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java Mon May 2 18:14:18 2011
@@ -49,6 +49,7 @@ public class BayesClassifierMapper exten
Mapper<Text,Text,StringTuple,DoubleWritable> {
private static final Logger log = LoggerFactory.getLogger(BayesClassifierMapper.class);
+ private static final DoubleWritable ONE = new DoubleWritable(1.0);
private int gramSize = 1;
@@ -70,25 +71,24 @@ public class BayesClassifierMapper exten
*/
@Override
public void map(Text key, Text value,
- OutputCollector<StringTuple,DoubleWritable> output, Reporter reporter) throws IOException {
- String label = key.toString();
-
+ OutputCollector<StringTuple,DoubleWritable> output,
+ Reporter reporter) throws IOException {
List<String> ngrams = new NGrams(value.toString(), gramSize).generateNGramsWithoutLabel();
try {
ClassifierResult result = classifier.classifyDocument(ngrams.toArray(new String[ngrams.size()]),
defaultCategory);
- String correctLabel = label;
+ String correctLabel = key.toString();
String classifiedLabel = result.getLabel();
StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE);
outputTuple.add(correctLabel);
outputTuple.add(classifiedLabel);
- output.collect(outputTuple, new DoubleWritable(1.0));
+ output.collect(outputTuple, ONE);
} catch (InvalidDatastoreException e) {
- throw new IOException(e.toString());
+ throw new IOException(e);
}
}
@@ -101,12 +101,12 @@ public class BayesClassifierMapper exten
Algorithm algorithm;
Datastore datastore;
- if (params.get("dataSource").equals("hdfs")) {
- if (params.get("classifierType").equalsIgnoreCase("bayes")) {
+ if ("hdfs".equals(params.get("dataSource"))) {
+ if ("bayes".equalsIgnoreCase(params.get("classifierType"))) {
log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
- } else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
+ } else if ("cbayes".equalsIgnoreCase(params.get("classifierType"))) {
log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Mon May 2 18:14:18 2011
@@ -80,7 +80,7 @@ public class BayesFeatureMapper extends
if (gramSize > 1) {
ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(Iterators.forArray(tokens)), gramSize);
do {
- String term = (sf.getAttribute(TermAttribute.class)).term();
+ String term = sf.getAttribute(TermAttribute.class).term();
if (term.length() > 0) {
if (wordList.containsKey(term)) {
wordList.put(term, 1 + wordList.get(term));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfOutputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfOutputFormat.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfOutputFormat.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfOutputFormat.java Mon May 2 18:14:18 2011
@@ -52,7 +52,7 @@ public class BayesTfIdfOutputFormat exte
protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
StringTuple key = (StringTuple) k;
- if ((key.length() == 1) && key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
+ if (key.length() == 1 && key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
return "trainer-vocabCount/" + name;
} else {
return "trainer-tfIdf/" + name;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java Mon May 2 18:14:18 2011
@@ -52,7 +52,7 @@ public class BayesWeightSummerOutputForm
protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
StringTuple key = (StringTuple) k;
- if ((key.length() == 1) && key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
+ if (key.length() == 1 && key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
return "Sigma_kSigma_j/" + name;
} else {
if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java Mon May 2 18:14:18 2011
@@ -63,8 +63,8 @@ public class ClassifierContext {
}
/**
- * Classify the document and return the top <code>numResults</code>
- *
+ * Classify the document and return the top {@code numResults}
+ *
* @param document
* The document to classify
* @param defaultCategory
@@ -72,7 +72,7 @@ public class ClassifierContext {
* @param numResults
* The maximum number of results to return, ranked by score. Ties are broken by comparing the
* category
- * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
+ * @return A Collection of {@link ClassifierResult}s.
* @throws InvalidDatastoreException
*/
public ClassifierResult[] classifyDocument(String[] document,
Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,62 @@
+/**
+ * <h2>Introduction</h2>
+ *
+ * <p>This package provides an implementation of a MapReduce-enabled Naïve Bayes classifier. It
+ * is a very simple classifier that counts the occurrences of words in association with a label which
+ * can then be used to determine the likelihood that a new document, and its words, should be assigned a particular
+ * label.
+ * </p>
+ *
+ * <h2>Implementation</h2>
+ *
+ * <p>The implementation is divided up into three parts:</p>
+ *
+ * <ol>
+ * <li>The Trainer -- responsible for doing the counting of the words and the labels</li>
+ * <li>The Model -- responsible for holding the training data in a useful way</li>
+ * <li>The Classifier -- responsible for using the trainers output to determine the category of previously unseen
+ * documents</li>
+ * </ol>
+ *
+ * <h3>The Trainer</h3>
+
+ * <p>The trainer is manifested in several classes:</p>
+ *
+ * <ol>
+ * <li>{@link org.apache.mahout.classifier.bayes.BayesDriver} -- Creates the Hadoop Naive Bayes job and outputs
+ * the model. This Driver encapsulates a lot of intermediate Map-Reduce Classes</li>
+ * <li>{@link org.apache.mahout.classifier.bayes.common.BayesFeatureDriver}</li>
+ * <li>{@link org.apache.mahout.classifier.bayes.common.BayesTfIdfDriver}</li>
+ * <li>{@link org.apache.mahout.classifier.bayes.common.BayesWeightSummerDriver}</li>
+ * <li>{@link org.apache.mahout.classifier.bayes.BayesThetaNormalizerDriver}</li>
+ * </ol>
+ *
+ * <p>The trainer assumes that the input files are in the {@link org.apache.hadoop.mapred.KeyValueTextInputFormat},
+ * i.e. the first token of the line is the label and separated from the remaining tokens on the line by a
+ * tab-delimiter. The remaining tokens are the unique features (words). Thus, input documents might look like:</p>
+ *
+ * <pre>
+ * hockey puck stick goalie forward defenseman referee ice checking slapshot helmet
+ * football field football pigskin referee helmet turf tackle
+ * </pre>
+ *
+ * <p>where hockey and football are the labels and the remaining words are the features associated with those
+ * particular labels.</p>
+ *
+ * <p>The output from the trainer is a {@link org.apache.hadoop.io.SequenceFile}.</p>
+ *
+ * <h3>The Model</h3>
+ *
+ * <p>The {@link org.apache.mahout.classifier.bayes.BayesModel} is the data structure used to represent the results of
+ * the training for use by the {@link org.apache.mahout.classifier.bayes.BayesClassifier}.
+ * A Model can be created by hand, or, if using
+ * the {@link org.apache.mahout.classifier.bayes.BayesDriver}, it can be created from the
+ * {@link org.apache.hadoop.io.SequenceFile} that is output. To create it from the SequenceFile, use the
+ * {@link org.apache.mahout.classifier.bayes.io.SequenceFileModelReader} located in the io subpackage.</p>
+ *
+ * <h3>The Classifier</h3>
+ *
+ * <p>The {@link org.apache.mahout.classifier.bayes.BayesClassifier} is responsible for using a
+ * {@link org.apache.mahout.classifier.bayes.BayesModel} to classify documents into categories.</p>
+ */
+package org.apache.mahout.classifier.bayes;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java Mon May 2 18:14:18 2011
@@ -73,7 +73,7 @@ public class Auc {
Preconditions.checkArgument(trueValue == 0 || trueValue == 1, "True value must be 0 or 1");
hasScore = true;
- int predictedClass = (score > threshold) ? 1 : 0;
+ int predictedClass = score > threshold ? 1 : 0;
confusion.set(trueValue, predictedClass, confusion.get(trueValue, predictedClass) + 1);
samples++;
@@ -209,7 +209,7 @@ public class Auc {
if (!hasScore) {
// find a constant score that would optimize log-likelihood, but use a dash of Bayesian
// conservatism to avoid dividing by zero or taking log(0)
- double p = (0.5 + confusion.get(1, 1)) / (1 + (confusion.get(0, 0) + confusion.get(1, 1)));
+ double p = (0.5 + confusion.get(1, 1)) / (1 + confusion.get(0, 0) + confusion.get(1, 1));
entropy.set(0, 0, confusion.get(0, 0) * Math.log(1 - p));
entropy.set(0, 1, confusion.get(0, 1) * Math.log(p));
entropy.set(1, 0, confusion.get(1, 0) * Math.log(1 - p));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java Mon May 2 18:14:18 2011
@@ -317,7 +317,7 @@ public class HmmModel implements Cloneab
return -1;
}
Integer tmp = (Integer) hiddenStateNames.get(name);
- return (tmp == null) ? -1 : tmp;
+ return tmp == null ? -1 : tmp;
}
/**
@@ -386,7 +386,7 @@ public class HmmModel implements Cloneab
return -1;
}
Integer tmp = (Integer) outputStateNames.get(name);
- return (tmp == null) ? -1 : tmp;
+ return tmp == null ? -1 : tmp;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java Mon May 2 18:14:18 2011
@@ -55,7 +55,7 @@ public final class HmmTrainer {
int nrOfOutputStates, int[] observedSequence, int[] hiddenSequence,
double pseudoCount) {
// make sure the pseudo count is not zero
- pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+ pseudoCount = pseudoCount == 0 ? Double.MIN_VALUE : pseudoCount;
// initialize the parameters
DenseMatrix transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
@@ -138,7 +138,7 @@ public final class HmmTrainer {
Collection<int[]> observedSequences, double pseudoCount) {
// make sure the pseudo count is not zero
- pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+ pseudoCount = pseudoCount == 0 ? Double.MIN_VALUE : pseudoCount;
// initialize parameters
DenseMatrix transitionMatrix = new DenseMatrix(nrOfHiddenStates,
@@ -218,7 +218,7 @@ public final class HmmTrainer {
int maxIterations, boolean scaled) {
// make sure the pseudo count is not zero
- pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+ pseudoCount = pseudoCount == 0 ? Double.MIN_VALUE : pseudoCount;
// allocate space for iteration models
HmmModel lastIteration;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java Mon May 2 18:14:18 2011
@@ -241,7 +241,7 @@ public final class HmmUtils {
nextID = model.getHiddenStateID(nextState);
}
// if the ID is -1, use the default value
- encoded[i] = (nextID < 0) ? defaultValue : nextID;
+ encoded[i] = nextID < 0 ? defaultValue : nextID;
}
return encoded;
}
Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,28 @@
+/**
+ * <p>Implements a variety of on-line logistric regression classifiers using SGD-based algorithms.
+ * SGD stands for Stochastic Gradient Descent and refers to a class of learning algorithms
+ * that make it relatively easy to build high speed on-line learning algorithms for a variety
+ * of problems, notably including supervised learning for classification.</p>
+ *
+ * <p>The primary class of interest in the this package is
+ * {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} which contains a
+ * number (typically 5) of sub-learners, each of which is given a different portion of the
+ * training data. Each of these sub-learners can then be evaluated on the data it was not
+ * trained on. This allows fully incremental learning while still getting cross-validated
+ * performance estimates.</p>
+ *
+ * <p>The CrossFoldLearner implements {@link org.apache.mahout.classifier.sgd.OnlineLearner}
+ * and thus expects to be fed input in the form
+ * of a target variable and a feature vector. The target variable is simply an integer in the
+ * half-open interval [0..numFeatures) where numFeatures is defined when the CrossFoldLearner
+ * is constructed. The creation of feature vectors is facilitated by the classes that inherit
+ * from {@link org.apache.mahout.classifier.sgd.FeatureVectorEncoder}.
+ * These classes currently implement a form of feature hashing with
+ * multiple probes to limit feature ambiguity.</p>
+ *
+ * @see org.apache.mahout.classifier.sgd.OnlineLogisticRegressionTest
+ * @see org.apache.mahout.classifier.sgd.ContinuousValueEncoderTest
+ * @see org.apache.mahout.classifier.sgd.TextValueEncoderTest
+ * @see org.apache.mahout.classifier.sgd.WordLikeValueEncoderTest
+ */
+package org.apache.mahout.classifier.sgd;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java Mon May 2 18:14:18 2011
@@ -295,7 +295,7 @@ public abstract class AbstractCluster im
nzero++;
}
// if vector is sparse or if we have bindings, use sparse notation
- if ((nzero < v.size()) || (bindings != null)) {
+ if (nzero < v.size() || bindings != null) {
buf.append('[');
for (int i = 0; i < v.size(); i++) {
double elem = v.get(i);
@@ -303,7 +303,7 @@ public abstract class AbstractCluster im
continue;
}
String label;
- if ((bindings != null) && ((label = bindings[i]) != null)) {
+ if (bindings != null && (label = bindings[i]) != null) {
buf.append(label).append(':');
} else {
buf.append(i).append(':');
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Mon May 2 18:14:18 2011
@@ -28,7 +28,7 @@ import org.apache.mahout.math.Vector;
*/
public class Canopy extends DistanceMeasureCluster {
- /** Used for deserializaztion as a writable */
+ /** Used for deserialization as a writable */
public Canopy() { }
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Mon May 2 18:14:18 2011
@@ -163,7 +163,7 @@ public class CanopyClusterer {
log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier());
canopy.observe(point);
}
- pointStronglyBound = pointStronglyBound || (dist < t2);
+ pointStronglyBound = pointStronglyBound || dist < t2;
}
if (!pointStronglyBound) {
log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Mon May 2 18:14:18 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.clustering.can
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -74,8 +73,7 @@ public class CanopyDriver extends Abstra
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.methodOption().create());
- Map<String, String> argMap = parseArguments(args);
- if (argMap == null) {
+ if (parseArguments(args) == null) {
return -1;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java Mon May 2 18:14:18 2011
@@ -26,32 +26,28 @@ import org.apache.hadoop.io.WritableComp
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VectorWritable;
-class CanopyMapper extends
- Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
+class CanopyMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
private final Collection<Canopy> canopies = new ArrayList<Canopy>();
private CanopyClusterer canopyClusterer;
@Override
- protected void map(WritableComparable<?> key, VectorWritable point,
- Context context) throws IOException, InterruptedException {
+ protected void map(WritableComparable<?> key, VectorWritable point, Context context)
+ throws IOException, InterruptedException {
canopyClusterer.addPointToCanopies(point.get(), canopies);
}
@Override
- protected void setup(Context context) throws IOException,
- InterruptedException {
+ protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
canopyClusterer = new CanopyClusterer(context.getConfiguration());
}
@Override
- protected void cleanup(Context context) throws IOException,
- InterruptedException {
+ protected void cleanup(Context context) throws IOException, InterruptedException {
for (Canopy canopy : canopies) {
- context.write(new Text("centroid"), new VectorWritable(canopy
- .computeCentroid()));
+ context.write(new Text("centroid"), new VectorWritable(canopy.computeCentroid()));
}
super.cleanup(context);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Mon May 2 18:14:18 2011
@@ -30,7 +30,7 @@ public class CanopyReducer extends Reduc
private final Collection<Canopy> canopies = new ArrayList<Canopy>();
- protected CanopyClusterer canopyClusterer;
+ private CanopyClusterer canopyClusterer;
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java Mon May 2 18:14:18 2011
@@ -36,8 +36,7 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class ClusterMapper
- extends
- Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable> {
+ extends Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable> {
private CanopyClusterer canopyClusterer;
@@ -58,8 +57,7 @@ public class ClusterMapper
}
@Override
- protected void setup(Context context) throws IOException,
- InterruptedException {
+ protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
canopyClusterer = new CanopyClusterer(context.getConfiguration());
@@ -71,8 +69,7 @@ public class ClusterMapper
if (clustersIn != null && clustersIn.length() > 0) {
Path clusterPath = new Path(clustersIn, "*");
FileSystem fs = clusterPath.getFileSystem(conf);
- Path[] paths = FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters
- .partFilter()));
+ Path[] paths = FileUtil.stat2Paths(fs.globStatus(clusterPath, PathFilters.partFilter()));
for (FileStatus file : fs.listStatus(paths, PathFilters.partFilter())) {
for (Canopy value : new SequenceFileValueIterable<Canopy>(file
.getPath(), conf)) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletClusterer.java Mon May 2 18:14:18 2011
@@ -227,7 +227,7 @@ public class DirichletClusterer {
}
// periodically add models to the cluster samples after the burn-in period
- if ((iteration >= burnin) && (iteration % thin == 0)) {
+ if (iteration >= burnin && iteration % thin == 0) {
clusterSamples.add(newModels);
}
// update the state from the new models
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java Mon May 2 18:14:18 2011
@@ -55,7 +55,7 @@ public final class UncommonDistributions
x = k * Math.exp(y);
double z = u * v * v;
double r = b + c * y - x;
- if ((r >= 4.5 * z - cheng) || (r >= Math.log(z))) {
+ if (r >= 4.5 * z - cheng || r >= Math.log(z)) {
accept = true;
}
} while (!accept);
@@ -205,7 +205,7 @@ public final class UncommonDistributions
double total = probabilities.zSum();
int cardinality = probabilities.size();
Vector result = new DenseVector(cardinality);
- for (int i = 0; (total > 0) && (i < cardinality); i++) {
+ for (int i = 0; total > 0 && i < cardinality; i++) {
double p = probabilities.get(i);
int ki = rBinomial(size, p / total);
total -= p;
@@ -216,7 +216,7 @@ public final class UncommonDistributions
}
/**
- * Returns an integer sampled according to this distribution. Takes time proprotional to np + 1. (Reference:
+ * Returns an integer sampled according to this distribution. Takes time proportional to np + 1. (Reference:
* Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Second
* time-waiting algorithm.
*/
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Mon May 2 18:14:18 2011
@@ -50,7 +50,7 @@ public class FuzzyKMeansClusterMapper
clusterer = new FuzzyKMeansClusterer(conf);
String clusterPath = conf.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
- if ((clusterPath != null) && (clusterPath.length() > 0)) {
+ if (clusterPath != null && clusterPath.length() > 0) {
FuzzyKMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java Mon May 2 18:14:18 2011
@@ -32,7 +32,7 @@ public class FuzzyKMeansCombiner extends
throws IOException, InterruptedException {
SoftCluster cluster = new SoftCluster();
for (ClusterObservations value : values) {
- if (value.getCombinerState() == 0) { // first time thru combiner
+ if (value.getCombinerState() == 0) { // first time through combiner
cluster.observe(value.getS1(), Math.pow(value.getS0(), clusterer.getM()));
} else {
cluster.observe(value);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Mon May 2 18:14:18 2011
@@ -197,7 +197,7 @@ public class FuzzyKMeansDriver extends A
* @param input
* the directory pathname for input points
* @param clustersIn
- * the directory pathname for iniput clusters
+ * the directory pathname for input clusters
* @param clustersOut
* the directory pathname for output clusters
* @param measureClass
@@ -409,7 +409,7 @@ public class FuzzyKMeansDriver extends A
int iteration = 1;
// iterate until the clusters converge
- while (!converged && (iteration <= maxIterations)) {
+ while (!converged && iteration <= maxIterations) {
log.info("Fuzzy K-Means Iteration {}", iteration);
// point the output to a new directory per iteration