You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/10 11:35:28 UTC
svn commit: r792856 [5/13] - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/common/
main/java/org/apache/mahout/cf/taste/eval/
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/impl/common/ main/j...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java Fri Jul 10 09:35:19 2009
@@ -35,17 +35,17 @@
/**
* <p>Implements an "inverse user frequency" transformation, which boosts preference values for items for which few
- * users have expressed a preference, and reduces preference values for items for which many users have expressed
- * a preference. The idea is that these "rare" {@link Item}s are more useful in deciding how similar two users'
- * tastes are, and so should be emphasized in other calculatioons. This idea is mentioned in
- * <a href="ftp://ftp.research.microsoft.com/pub/tr/tr-98-12.pdf">Empirical Analysis of Predictive Algorithms for
+ * users have expressed a preference, and reduces preference values for items for which many users have expressed a
+ * preference. The idea is that these "rare" {@link Item}s are more useful in deciding how similar two users' tastes
+ * are, and so should be emphasized in other calculatioons. This idea is mentioned in <a
+ * href="ftp://ftp.research.microsoft.com/pub/tr/tr-98-12.pdf">Empirical Analysis of Predictive Algorithms for
* Collaborative Filtering</a>.</p>
*
- * <p>A scaling factor is computed for each {@link Item} by dividing the total number of users by the number of
- * users expressing a preference for that item, and taking the log of that value. The log base of this calculation
- * can be controlled in the constructor. Intuitively, the right value for the base is equal to the average
- * number of users who express a preference for each item in your model. If each item has about 100 preferences
- * on average, 100.0 is a good log base.</p>
+ * <p>A scaling factor is computed for each {@link Item} by dividing the total number of users by the number of users
+ * expressing a preference for that item, and taking the log of that value. The log base of this calculation can be
+ * controlled in the constructor. Intuitively, the right value for the base is equal to the average number of users who
+ * express a preference for each item in your model. If each item has about 100 preferences on average, 100.0 is a good
+ * log base.</p>
*/
public final class InverseUserFrequency implements PreferenceTransform {
@@ -59,7 +59,7 @@
* <p>Creates a {@link InverseUserFrequency} transformation. Computations use the given log base.</p>
*
* @param dataModel {@link DataModel} from which to calculate user frequencies
- * @param logBase calculation logarithm base
+ * @param logBase calculation logarithm base
* @throws IllegalArgumentException if dataModel is <code>null</code> or logBase is {@link Double#NaN} or <= 1.0
*/
public InverseUserFrequency(DataModel dataModel, double logBase) throws TasteException {
@@ -75,9 +75,7 @@
recompute();
}
- /**
- * @return log base used in this object's calculations
- */
+ /** @return log base used in this object's calculations */
public double getLogBase() {
return logBase;
}
@@ -114,7 +112,7 @@
double logFactor = Math.log(logBase);
for (Map.Entry<Item, int[]> entry : itemPreferenceCounts.getEntrySet()) {
newIufFactors.put(entry.getKey(),
- Math.log((double) numUsers / (double) entry.getValue()[0]) / logFactor);
+ Math.log((double) numUsers / (double) entry.getValue()[0]) / logFactor);
}
iufFactors.set(Collections.unmodifiableMap(newIufFactors));
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java Fri Jul 10 09:35:19 2009
@@ -30,15 +30,12 @@
import java.util.Collection;
/**
- * <p>Normalizes preference values for a {@link User} by converting them to
- * <a href="http://mathworld.wolfram.com/z-Score.html">"z-scores"</a>. This process
- * normalizes preference values to adjust for variation in mean and variance of a
- * user's preferences.</p>
+ * <p>Normalizes preference values for a {@link User} by converting them to <a href="http://mathworld.wolfram.com/z-Score.html">"z-scores"</a>.
+ * This process normalizes preference values to adjust for variation in mean and variance of a user's preferences.</p>
*
- * <p>Imagine two users, one who tends to rate every movie he/she sees four or five stars,
- * and another who uses the full one to five star range when assigning ratings. This
- * transform normalizes away the difference in scale used by the two users so that both
- * have a mean preference of 0.0 and a standard deviation of 1.0.</p>
+ * <p>Imagine two users, one who tends to rate every movie he/she sees four or five stars, and another who uses the full
+ * one to five star range when assigning ratings. This transform normalizes away the difference in scale used by the two
+ * users so that both have a mean preference of 0.0 and a standard deviation of 1.0.</p>
*/
public final class ZScore implements PreferenceTransform {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java Fri Jul 10 09:35:19 2009
@@ -23,8 +23,8 @@
import java.util.List;
/**
- * <p>Implementations represent a repository of information about {@link User}s and their
- * associated {@link Preference}s for {@link Item}s.</p>
+ * <p>Implementations represent a repository of information about {@link User}s and their associated {@link Preference}s
+ * for {@link Item}s.</p>
*/
public interface DataModel extends Refreshable {
@@ -38,7 +38,8 @@
* @param id user ID
* @return {@link User} who has that ID
* @throws TasteException if an error occurs while accessing the data
- * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if there is no such {@link User}
+ * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+ * if there is no such {@link User}
*/
User getUser(Object id) throws TasteException;
@@ -52,7 +53,8 @@
* @param id item ID
* @return {@link Item} that has that ID
* @throws TasteException if an error occurs while accessing the data
- * @throws org.apache.mahout.cf.taste.common.NoSuchItemException if there is no such {@link Item}
+ * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+ * if there is no such {@link Item}
*/
Item getItem(Object id) throws TasteException;
@@ -65,15 +67,14 @@
/**
* @param itemID item ID
- * @return all existing {@link Preference}s expressed for that item, ordered by {@link User},
- * as an array
+ * @return all existing {@link Preference}s expressed for that item, ordered by {@link User}, as an array
* @throws TasteException if an error occurs while accessing the data
*/
Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException;
/**
- * @return total number of {@link Item}s known to the model. This is generally the union
- * of all {@link Item}s preferred by at least one {@link User} but could include more.
+ * @return total number of {@link Item}s known to the model. This is generally the union of all {@link Item}s
+ * preferred by at least one {@link User} but could include more.
* @throws TasteException if an error occurs while accessing the data
*/
int getNumItems() throws TasteException;
@@ -87,9 +88,9 @@
/**
* @param itemIDs item IDs to check for
* @return the number of users who have expressed a preference for all of the items
- * @throws TasteException if an error occurs while accessing the data
- * @throws IllegalArgumentException if itemIDs is null, empty, or larger than 2 elements
- * since currently only queries of up to 2 items are needed and supported
+ * @throws TasteException if an error occurs while accessing the data
+ * @throws IllegalArgumentException if itemIDs is null, empty, or larger than 2 elements since currently only queries
+ * of up to 2 items are needed and supported
*/
int getNumUsersWithPreferenceFor(Object... itemIDs) throws TasteException;
@@ -98,7 +99,7 @@
*
* @param userID user to set preference for
* @param itemID item to set preference for
- * @param value preference value
+ * @param value preference value
* @throws TasteException if an error occurs while accessing the data
*/
void setPreference(Object userID, Object itemID, double value) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java Fri Jul 10 09:35:19 2009
@@ -18,21 +18,17 @@
package org.apache.mahout.cf.taste.model;
/**
- * <p>Implementations of this interface represent items that {@link User}s have
- * preferences for, and which can be recommended to them. {@link Item}s must have
- * a unique ID of some kind, and must be {@link Comparable}.</p>
+ * <p>Implementations of this interface represent items that {@link User}s have preferences for, and which can be
+ * recommended to them. {@link Item}s must have a unique ID of some kind, and must be {@link Comparable}.</p>
*/
public interface Item extends Comparable<Item> {
- /**
- * @return unique ID for this item
- */
+ /** @return unique ID for this item */
Object getID();
/**
- * @return true if and only if this {@link Item} can be recommended to a user;
- * for example, this could be false for an {@link Item} that is no longer
- * available but which remains valuable for recommendation
+ * @return true if and only if this {@link Item} can be recommended to a user; for example, this could be false for an
+ * {@link Item} that is no longer available but which remains valuable for recommendation
*/
boolean isRecommendable();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java Fri Jul 10 09:35:19 2009
@@ -26,8 +26,8 @@
DataSource getDataSource();
/**
- * @param assumeExists assume the item exists; don't consult the underlying database. This is a necessary
- * performance enhancement shortcut needed by slope one recommenders
+ * @param assumeExists assume the item exists; don't consult the underlying database. This is a necessary performance
+ * enhancement shortcut needed by slope one recommenders
* @see #getItem(Object)
*/
Item getItem(Object id, boolean assumeExists) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java Fri Jul 10 09:35:19 2009
@@ -18,25 +18,20 @@
package org.apache.mahout.cf.taste.model;
/**
- * <p>A {@link Preference} encapsulates an {@link Item} and a preference value, which
- * indicates the strength of the preference for it. {@link Preference}s are associated
- * to {@link User}s.</p>
+ * <p>A {@link Preference} encapsulates an {@link Item} and a preference value, which indicates the strength of the
+ * preference for it. {@link Preference}s are associated to {@link User}s.</p>
*/
public interface Preference {
- /**
- * @return {@link User} who prefers the {@link Item}
- */
+ /** @return {@link User} who prefers the {@link Item} */
User getUser();
- /**
- * @return {@link Item} that is preferred
- */
+ /** @return {@link Item} that is preferred */
Item getItem();
/**
- * @return strength of the preference for that item. Zero should indicate "no preference either way";
- * positive values indicate preference and negative values indicate dislike
+ * @return strength of the preference for that item. Zero should indicate "no preference either way"; positive values
+ * indicate preference and negative values indicate dislike
*/
double getValue();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java Fri Jul 10 09:35:19 2009
@@ -18,8 +18,8 @@
package org.apache.mahout.cf.taste.model;
/**
- * An alternate representation of an array of {@link Preference}. Implementations, in theory,
- * can produce a more memory-efficient representation. This is not used yet.
+ * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a more
+ * memory-efficient representation. This is not used yet.
*/
public interface PreferenceArray {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java Fri Jul 10 09:35:19 2009
@@ -17,45 +17,39 @@
package org.apache.mahout.cf.taste.model;
-/**
- * <p>Implementations represent a user, who has preferences for {@link Item}s.</p>
- */
+/** <p>Implementations represent a user, who has preferences for {@link Item}s.</p> */
public interface User extends Comparable<User> {
- /**
- * @return unique user ID
- */
+ /** @return unique user ID */
Object getID();
/**
* @param itemID ID of item to get the user's preference for
- * @return user's {@link Preference} for that {@link Item}, or <code>null</code> if the user expresses
- * no such preference
+ * @return user's {@link Preference} for that {@link Item}, or <code>null</code> if the user expresses no such
+ * preference
*/
Preference getPreferenceFor(Object itemID);
/**
- * Sets a preference that this {@link User} has. Note that in general callers should expect this to
- * be a slow operation, compared to {@link #getPreferenceFor(Object)}.
+ * Sets a preference that this {@link User} has. Note that in general callers should expect this to be a slow
+ * operation, compared to {@link #getPreferenceFor(Object)}.
*/
void setPreference(Item item, double value);
- /**
- * Removes a preference. This method should also be considered potentially slow.
- */
+ /** Removes a preference. This method should also be considered potentially slow. */
void removePreference(Object itemID);
/**
- * <p>Returns a sequence of {@link Preference}s for this {@link User} which can be iterated over.
- * Note that the sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
+ * <p>Returns a sequence of {@link Preference}s for this {@link User} which can be iterated over. Note that the
+ * sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
*
* @return a sequence of {@link Preference}s
*/
Iterable<Preference> getPreferences();
/**
- * <p>Returns an array view of {@link Preference}s for this {@link User}.
- * Note that the sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
+ * <p>Returns an array view of {@link Preference}s for this {@link User}. Note that the sequence <em>must</em> be "in
+ * order": ordered by {@link Item}.</p>
*
* @return an array of {@link Preference}s
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java Fri Jul 10 09:35:19 2009
@@ -24,15 +24,16 @@
import java.util.Collection;
/**
- * <p>Implementations of this interface compute a "neighborhood" of {@link User}s like a
- * given {@link User}. This neighborhood can be used to compute recommendations then.</p>
+ * <p>Implementations of this interface compute a "neighborhood" of {@link User}s like a given {@link User}. This
+ * neighborhood can be used to compute recommendations then.</p>
*/
public interface UserNeighborhood extends Refreshable {
/**
* @param userID ID of user for which a neighborhood will be computed
* @return {@link Collection} of {@link User}s in the neighborhood
- * @throws org.apache.mahout.cf.taste.common.TasteException if an error occurs while accessing data
+ * @throws org.apache.mahout.cf.taste.common.TasteException
+ * if an error occurs while accessing data
*/
Collection<User> getUserNeighborhood(Object userID) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java Fri Jul 10 09:35:19 2009
@@ -22,14 +22,11 @@
import java.util.Collection;
-/**
- * <p>Interface implemented by "clustering" recommenders.</p>
- */
+/** <p>Interface implemented by "clustering" recommenders.</p> */
public interface ClusteringRecommender extends Recommender {
/**
- * <p>Returns the cluster of users to which the given {@link User}, denoted by user ID,
- * belongs.</p>
+ * <p>Returns the cluster of users to which the given {@link User}, denoted by user ID, belongs.</p>
*
* @param userID user ID for which to find a cluster
* @return {@link Collection} of {@link User}s in the requested user's cluster
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java Fri Jul 10 09:35:19 2009
@@ -23,13 +23,11 @@
import java.util.List;
-/**
- * <p>Interface implemented by "item-based" recommenders.</p>
- */
+/** <p>Interface implemented by "item-based" recommenders.</p> */
public interface ItemBasedRecommender extends Recommender {
/**
- * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
+ * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
* @param howMany desired number of most similar {@link Item}s to find
* @return {@link Item}s most similar to the given item, ordered from most similar to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
@@ -37,10 +35,10 @@
List<RecommendedItem> mostSimilarItems(Object itemID, int howMany) throws TasteException;
/**
- * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
- * @param howMany desired number of most similar {@link Item}s to find
- * @param rescorer {@link Rescorer} which can adjust item-item similarity
- * estimates used to determine most similar items
+ * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
+ * @param howMany desired number of most similar {@link Item}s to find
+ * @param rescorer {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+ * items
* @return {@link Item}s most similar to the given item, ordered from most similar to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
@@ -50,18 +48,17 @@
/**
* @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
- * @param howMany desired number of most similar {@link Item}s to find
- * estimates used to determine most similar items
+ * @param howMany desired number of most similar {@link Item}s to find estimates used to determine most similar items
* @return {@link Item}s most similar to the given items, ordered from most similar to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
List<RecommendedItem> mostSimilarItems(List<Object> itemIDs, int howMany) throws TasteException;
/**
- * @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
- * @param howMany desired number of most similar {@link Item}s to find
- * @param rescorer {@link Rescorer} which can adjust item-item similarity
- * estimates used to determine most similar items
+ * @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
+ * @param howMany desired number of most similar {@link Item}s to find
+ * @param rescorer {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+ * items
* @return {@link Item}s most similar to the given items, ordered from most similar to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
@@ -70,21 +67,20 @@
Rescorer<Pair<Item, Item>> rescorer) throws TasteException;
/**
- * <p>Lists the {@link Item}s that were most influential in recommending a given item to a given user.
- * Exactly how this is determined is left to the implementation, but, generally this will return items
- * that the user prefers and that are similar to the given item.</p>
+ * <p>Lists the {@link Item}s that were most influential in recommending a given item to a given user. Exactly how
+ * this is determined is left to the implementation, but, generally this will return items that the user prefers and
+ * that are similar to the given item.</p>
*
- * <p>This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's
- * returning recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this
- * class since it encapsulates an {@link Item} and value. The value here does not necessarily have
- * a consistent interpretation or expected range; it will be higher the more influential the {@link Item}
- * was in the recommendation.</p>
+ * <p>This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning
+ * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it encapsulates
+ * an {@link Item} and value. The value here does not necessarily have a consistent interpretation or expected range;
+ * it will be higher the more influential the {@link Item} was in the recommendation.</p>
*
- * @param userID ID of {@link org.apache.mahout.cf.taste.model.User} who was recommended the {@link Item}
- * @param itemID ID of {@link Item} that was recommended
+ * @param userID ID of {@link org.apache.mahout.cf.taste.model.User} who was recommended the {@link Item}
+ * @param itemID ID of {@link Item} that was recommended
* @param howMany maximum number of {@link Item}s to return
- * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given
- * {@link Item} to least
+ * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given {@link
+ * Item} to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
List<RecommendedItem> recommendedBecause(Object userID, Object itemID, int howMany) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java Fri Jul 10 09:35:19 2009
@@ -20,21 +20,17 @@
import org.apache.mahout.cf.taste.model.Item;
/**
- * <p>Implementations encapsulate items that are recommended, and include
- * the {@link org.apache.mahout.cf.taste.model.Item} recommended and a value expressing
- * the strength of the preference.</p>
+ * <p>Implementations encapsulate items that are recommended, and include the {@link
+ * org.apache.mahout.cf.taste.model.Item} recommended and a value expressing the strength of the preference.</p>
*/
public interface RecommendedItem extends Comparable<RecommendedItem> {
- /**
- * @return the recommended {@link Item}
- */
+ /** @return the recommended {@link Item} */
Item getItem();
/**
- * <p>A value expressing the strength of the preference for the recommended
- * {@link Item}. The range of the values depends on the implementation.
- * Implementations must use larger values to express stronger preference.</p>
+ * <p>A value expressing the strength of the preference for the recommended {@link Item}. The range of the values
+ * depends on the implementation. Implementations must use larger values to express stronger preference.</p>
*
* @return strength of the preference
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java Fri Jul 10 09:35:19 2009
@@ -25,27 +25,24 @@
import java.util.List;
/**
- * <p>Implementations of this interface can recommend {@link Item}s for a
- * {@link org.apache.mahout.cf.taste.model.User}. Implementations will likely take advantage of several
- * classes in other packages here to compute this.</p>
+ * <p>Implementations of this interface can recommend {@link Item}s for a {@link org.apache.mahout.cf.taste.model.User}.
+ * Implementations will likely take advantage of several classes in other packages here to compute this.</p>
*/
public interface Recommender extends Refreshable {
/**
- * @param userID user for which recommendations are to be computed
+ * @param userID user for which recommendations are to be computed
* @param howMany desired number of recommendations
- * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly
- * recommend to least
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException;
/**
- * @param userID user for which recommendations are to be computed
- * @param howMany desired number of recommendations
+ * @param userID user for which recommendations are to be computed
+ * @param howMany desired number of recommendations
* @param rescorer rescoring function to apply before final list of recommendations is determined
- * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly
- * recommend to least
+ * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to least
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer) throws TasteException;
@@ -53,9 +50,8 @@
/**
* @param userID user ID whose preference is to be estimated
* @param itemID item ID to estimate preference for
- * @return an estimated preference if the user has not expressed a preference for the item, or else
- * the user's actual preference for the item. If a preference cannot be estimated, returns
- * {@link Double#NaN}
+ * @return an estimated preference if the user has not expressed a preference for the item, or else the user's actual
+ * preference for the item. If a preference cannot be estimated, returns {@link Double#NaN}
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
double estimatePreference(Object userID, Object itemID) throws TasteException;
@@ -63,7 +59,7 @@
/**
* @param userID user to set preference for
* @param itemID item to set preference for
- * @param value preference value
+ * @param value preference value
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
void setPreference(Object userID, Object itemID, double value) throws TasteException;
@@ -75,9 +71,7 @@
*/
void removePreference(Object userID, Object itemID) throws TasteException;
- /**
- * @return {@link DataModel} used by this {@link Recommender}
- */
+ /** @return {@link DataModel} used by this {@link Recommender} */
DataModel getDataModel();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java Fri Jul 10 09:35:19 2009
@@ -18,20 +18,20 @@
package org.apache.mahout.cf.taste.recommender;
/**
- * <p>A {@link Rescorer} simply assigns a new "score" to a thing like an
- * {@link org.apache.mahout.cf.taste.model.Item} or {@link org.apache.mahout.cf.taste.model.User} which a {@link Recommender}
- * is considering returning as a top recommendation. It may be used to arbitrarily re-rank the results
- * according to application-specific logic before returning recommendations. For example, an application
- * may want to boost the score of items in a certain category just for one request.</p>
+ * <p>A {@link Rescorer} simply assigns a new "score" to a thing like an {@link org.apache.mahout.cf.taste.model.Item}
+ * or {@link org.apache.mahout.cf.taste.model.User} which a {@link Recommender} is considering returning as a top
+ * recommendation. It may be used to arbitrarily re-rank the results according to application-specific logic before
+ * returning recommendations. For example, an application may want to boost the score of items in a certain category
+ * just for one request.</p>
*
- * <p>A {@link Rescorer} can also exclude a thing from consideration entirely by returning <code>true</code>
- * from {@link #isFiltered(Object)}.</p>
+ * <p>A {@link Rescorer} can also exclude a thing from consideration entirely by returning <code>true</code> from {@link
+ * #isFiltered(Object)}.</p>
*/
public interface Rescorer<T> {
/**
- * @param thing thing ({@link org.apache.mahout.cf.taste.model.Item} or
- * {@link org.apache.mahout.cf.taste.model.User} really) to rescore
+ * @param thing thing ({@link org.apache.mahout.cf.taste.model.Item} or {@link org.apache.mahout.cf.taste.model.User}
+ * really) to rescore
* @param originalScore original score
* @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely
*/
@@ -39,6 +39,7 @@
/**
* Returns <code>true</code> to exclude the given thing.
+ *
* @param thing the thing to filter
* @return <code>true</code> to exclude, <code>false</code> otherwise
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java Fri Jul 10 09:35:19 2009
@@ -22,13 +22,11 @@
import java.util.List;
-/**
- * <p>Interface implemented by "user-based" recommenders.</p>
- */
+/** <p>Interface implemented by "user-based" recommenders.</p> */
public interface UserBasedRecommender extends Recommender {
/**
- * @param userID ID of {@link User} for which to find most similar other {@link User}s
+ * @param userID ID of {@link User} for which to find most similar other {@link User}s
* @param howMany desired number of most similar {@link User}s to find
* @return {@link User}s most similar to the given user
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
@@ -36,10 +34,10 @@
List<User> mostSimilarUsers(Object userID, int howMany) throws TasteException;
/**
- * @param userID ID of {@link User} for which to find most similar other {@link User}s
- * @param howMany desired number of most similar {@link User}s to find
- * @param rescorer {@link Rescorer} which can adjust user-user similarity
- * estimates used to determine most similar users
+ * @param userID ID of {@link User} for which to find most similar other {@link User}s
+ * @param howMany desired number of most similar {@link User}s to find
+ * @param rescorer {@link Rescorer} which can adjust user-user similarity estimates used to determine most similar
+ * users
* @return {@link User}s most similar to the given user
* @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java Fri Jul 10 09:35:19 2009
@@ -27,57 +27,44 @@
import java.util.Set;
/**
- * <p>Implementations store item-item preference diffs for a
- * {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}.
- * It actually does a bit more for this implementation, like listing all items that may be
- * considered for recommedation, in order to maximize what implementations can do
- * to optimize the slope-one algorithm.</p>
+ * <p>Implementations store item-item preference diffs for a {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}.
+ * It actually does a bit more for this implementation, like listing all items that may be considered for recommedation,
+ * in order to maximize what implementations can do to optimize the slope-one algorithm.</p>
*
* @see org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender
*/
public interface DiffStorage extends Refreshable {
/**
- * @param itemID1
- * @param itemID2
- * @return {@link RunningAverage} encapsulating the average difference in preferences
- * between items corresponding to <code>itemID1</code> and <code>itemID2</code>, in that direction; that is,
- * it's the average of item 2's preferences minus item 1's preferences
- * @throws TasteException
+ * @return {@link RunningAverage} encapsulating the average difference in preferences between items corresponding to
+ * <code>itemID1</code> and <code>itemID2</code>, in that direction; that is, it's the average of item 2's
+ * preferences minus item 1's preferences
*/
RunningAverage getDiff(Object itemID1, Object itemID2) throws TasteException;
/**
* @param userID user ID to get diffs for
* @param itemID itemID to assess
- * @param prefs user's preferendces
+ * @param prefs user's preferendces
* @return {@link List} of {@link RunningAverage} for that user's item-item diffs
- * @throws TasteException
*/
RunningAverage[] getDiffs(Object userID, Object itemID, Preference[] prefs) throws TasteException;
- /**
- * @param itemID
- * @return {@link RunningAverage} encapsulating the average preference for the given item
- * @throws TasteException
- */
+ /** @return {@link RunningAverage} encapsulating the average preference for the given item */
RunningAverage getAverageItemPref(Object itemID) throws TasteException;
/**
* <p>Updates internal data structures to reflect an update in a preference value for an item.</p>
*
- * @param itemID item to update preference value for
+ * @param itemID item to update preference value for
* @param prefDelta amount by which preference value changed (or its old value, if being removed
- * @param remove if <code>true</code>, operation reflects a removal rather than change of preference
- * @throws TasteException
+ * @param remove if <code>true</code>, operation reflects a removal rather than change of preference
*/
void updateItemPref(Object itemID, double prefDelta, boolean remove) throws TasteException;
/**
- * @param userID
- * @return {@link Item}s that may possibly be recommended to the given user, which may not be all
- * {@link Item}s since the item-item diff matrix may be sparses
- * @throws TasteException
+ * @return {@link Item}s that may possibly be recommended to the given user, which may not be all {@link Item}s since
+ * the item-item diff matrix may be sparses
*/
Set<Item> getRecommendableItems(Object userID) throws TasteException;
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java Fri Jul 10 09:35:19 2009
@@ -22,17 +22,16 @@
import org.apache.mahout.cf.taste.model.Item;
/**
- * <p>Implementations of this interface define a notion of similarity between two
- * {@link Item}s. Implementations should return values in the range -1.0 to 1.0, with
- * 1.0 representing perfect similarity.</p>
+ * <p>Implementations of this interface define a notion of similarity between two {@link Item}s. Implementations should
+ * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity.</p>
*
* @see UserSimilarity
*/
public interface ItemSimilarity extends Refreshable {
/**
- * <p>Returns the degree of similarity, of two {@link Item}s, based
- * on the preferences that {@link org.apache.mahout.cf.taste.model.User}s have expressed for the items.</p>
+ * <p>Returns the degree of similarity, of two {@link Item}s, based on the preferences that {@link
+ * org.apache.mahout.cf.taste.model.User}s have expressed for the items.</p>
*
* @param item1 first item
* @param item2 second item
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java Fri Jul 10 09:35:19 2009
@@ -23,9 +23,9 @@
import org.apache.mahout.cf.taste.model.User;
/**
- * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item}
- * that the user has not expressed any preference for. This might be an average of other preferences scores
- * from that user, for example. This technique is sometimes called "default voting".</p>
+ * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item} that the
+ * user has not expressed any preference for. This might be an average of other preferences scores from that user, for
+ * example. This technique is sometimes called "default voting".</p>
*/
public interface PreferenceInferrer extends Refreshable {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java Fri Jul 10 09:35:19 2009
@@ -22,17 +22,15 @@
import org.apache.mahout.cf.taste.model.User;
/**
- * <p>Implementations of this interface define a notion of similarity between two
- * {@link User}s. Implementations should return values in the range -1.0 to 1.0, with
- * 1.0 representing perfect similarity.</p>
+ * <p>Implementations of this interface define a notion of similarity between two {@link User}s. Implementations should
+ * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity.</p>
*
* @see ItemSimilarity
*/
public interface UserSimilarity extends Refreshable {
/**
- * <p>Returns the degree of similarity, of two {@link User}s, based
- * on the their preferences.</p>
+ * <p>Returns the degree of similarity, of two {@link User}s, based on the their preferences.</p>
*
* @param user1 first user
* @param user2 second user
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java Fri Jul 10 09:35:19 2009
@@ -22,10 +22,10 @@
import org.apache.mahout.cf.taste.model.Preference;
/**
- * <p>Implementations encapsulate a transform on a {@link Preference}'s value. These transformations are
- * typically applied to values before they are used to compute a similarity value. They are typically not
- * applied elsewhere; in particular {@link org.apache.mahout.cf.taste.model.DataModel}s no longer use a transform
- * like this to transform all of their preference values at the source.</p>
+ * <p>Implementations encapsulate a transform on a {@link Preference}'s value. These transformations are typically
+ * applied to values before they are used to compute a similarity value. They are typically not applied elsewhere; in
+ * particular {@link org.apache.mahout.cf.taste.model.DataModel}s no longer use a transform like this to transform all
+ * of their preference values at the source.</p>
*/
public interface PreferenceTransform extends Refreshable {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java Fri Jul 10 09:35:19 2009
@@ -20,16 +20,14 @@
import org.apache.mahout.cf.taste.common.Refreshable;
/**
- * <p>Implementations encapsulate some transformation on similarity values between two
- * things, where things might be {@link org.apache.mahout.cf.taste.model.User}s or
- * {@link org.apache.mahout.cf.taste.model.Item}s or
- * something else.</p>
+ * <p>Implementations encapsulate some transformation on similarity values between two things, where things might be
+ * {@link org.apache.mahout.cf.taste.model.User}s or {@link org.apache.mahout.cf.taste.model.Item}s or something
+ * else.</p>
*/
public interface SimilarityTransform<T> extends Refreshable {
/**
- * @param value original similarity between thing1 and thing2
- * (should be in [-1,1])
+ * @param value original similarity between thing1 and thing2 (should be in [-1,1])
* @return transformed similarity (should be in [-1,1])
*/
double transformSimilarity(T thing1, T thing2, double value);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Fri Jul 10 09:35:19 2009
@@ -17,13 +17,13 @@
package org.apache.mahout.classifier;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
@@ -32,6 +32,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
@@ -41,15 +42,13 @@
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
-import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
/**
- * Flatten a file into format that can be read by the Bayes M/R job. <p/> One
- * document per line, first token is the label followed by a tab, rest of the
- * line are the terms.
+ * Flatten a file into format that can be read by the Bayes M/R job. <p/> One document per line, first token is the
+ * label followed by a tab, rest of the line are the terms.
*/
public class BayesFileFormatter {
@@ -61,18 +60,16 @@
}
/**
- * Collapse all the files in the inputDir into a single file in the proper
- * Bayes format, 1 document per line
- *
- * @param label The label
- * @param analyzer The analyzer to use
- * @param inputDir The input Directory
- * @param charset The charset of the input files
+ * Collapse all the files in the inputDir into a single file in the proper Bayes format, 1 document per line
+ *
+ * @param label The label
+ * @param analyzer The analyzer to use
+ * @param inputDir The input Directory
+ * @param charset The charset of the input files
* @param outputFile The file to collapse to
- * @throws java.io.IOException
*/
public static void collapse(String label, Analyzer analyzer, File inputDir,
- Charset charset, File outputFile) throws IOException {
+ Charset charset, File outputFile) throws IOException {
Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
charset);
try {
@@ -85,17 +82,15 @@
/**
* Write the input files to the outdir, one output file per input file
- *
- * @param label The label of the file
+ *
+ * @param label The label of the file
* @param analyzer The analyzer to use
- * @param input The input file or directory. May not be null
- * @param charset The Character set of the input files
- * @param outDir The output directory. Files will be written there with the
- * same name as the input file
- * @throws IOException
+ * @param input The input file or directory. May not be null
+ * @param charset The Character set of the input files
+ * @param outDir The output directory. Files will be written there with the same name as the input file
*/
public static void format(String label, Analyzer analyzer, File input,
- Charset charset, File outDir) throws IOException {
+ Charset charset, File outDir) throws IOException {
if (input.isDirectory()) {
input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
} else {
@@ -110,8 +105,8 @@
}
/**
- * Hack the FileFilter mechanism so that we don't get stuck on large
- * directories and don't have to loop the list twice
+ * Hack the FileFilter mechanism so that we don't get stuck on large directories and don't have to loop the list
+ * twice
*/
private static class FileProcessor implements FileFilter {
private final String label;
@@ -126,14 +121,12 @@
/**
* Use this when you want to collapse all files to a single file
- *
- * @param label The label
- * @param analyzer
- * @param charset
+ *
+ * @param label The label
* @param writer must not be null and will not be closed
*/
private FileProcessor(String label, Analyzer analyzer, Charset charset,
- Writer writer) {
+ Writer writer) {
this.label = label;
this.analyzer = analyzer;
this.charset = charset;
@@ -142,14 +135,11 @@
/**
* Use this when you want a writer per file
- *
- * @param label
- * @param analyzer
- * @param charset
+ *
* @param outputDir must not be null.
*/
private FileProcessor(String label, Analyzer analyzer, Charset charset,
- File outputDir) {
+ File outputDir) {
this.label = label;
this.analyzer = analyzer;
this.charset = charset;
@@ -189,16 +179,16 @@
/**
* Write the tokens and the label from the Reader to the writer
- *
- * @param label The label
+ *
+ * @param label The label
* @param analyzer The analyzer to use
- * @param inFile the file to read and whose contents are passed to the analyzer
- * @param charset character encoding to assume when reading the input file
- * @param writer The Writer, is not closed by this method
+ * @param inFile the file to read and whose contents are passed to the analyzer
+ * @param charset character encoding to assume when reading the input file
+ * @param writer The Writer, is not closed by this method
* @throws java.io.IOException if there was a problem w/ the reader
*/
private static void writeFile(String label, Analyzer analyzer, File inFile,
- Charset charset, Writer writer) throws IOException {
+ Charset charset, Writer writer) throws IOException {
Reader reader = new InputStreamReader(new FileInputStream(inFile), charset);
try {
TokenStream ts = analyzer.tokenStream(label, reader);
@@ -229,11 +219,10 @@
/**
* Convert a Reader to a vector
- *
+ *
* @param analyzer The Analyzer to use
- * @param reader The reader to feed to the Analyzer
+ * @param reader The reader to feed to the Analyzer
* @return An array of unique tokens
- * @throws IOException
*/
public static String[] readerToDocument(Analyzer analyzer, Reader reader)
throws IOException {
@@ -252,45 +241,45 @@
/**
* Run the FileFormatter
- *
+ *
* @param args The input args. Run with -h to see the help
* @throws ClassNotFoundException if the Analyzer can't be found
* @throws IllegalAccessException if the Analyzer can't be constructed
* @throws InstantiationException if the Analyzer can't be constructed
- * @throws IOException if the files can't be dealt with properly
+ * @throws IOException if the files can't be dealt with properly
*/
- public static void main(String[] args) throws ClassNotFoundException,
+ public static void main(String[] args) throws ClassNotFoundException,
IllegalAccessException, InstantiationException, IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
- withDescription("The Input file").withShortName("i").create();
+ abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Input file").withShortName("i").create();
Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
- withDescription("The output file").withShortName("o").create();
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+ withDescription("The output file").withShortName("o").create();
Option labelOpt = obuilder.withLongName("label").withRequired(true).withArgument(
- abuilder.withName("label").withMinimum(1).withMaximum(1).create()).
- withDescription("The label of the file").withShortName("l").create();
+ abuilder.withName("label").withMinimum(1).withMaximum(1).create()).
+ withDescription("The label of the file").withShortName("l").create();
Option analyzerOpt = obuilder.withLongName("analyzer").withArgument(
- abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
- withDescription("The fully qualified class name of the analyzer to use. Must have a no-arg constructor. Default is the StandardAnalyzer").withShortName("a").create();
+ abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
+ withDescription("The fully qualified class name of the analyzer to use. Must have a no-arg constructor. Default is the StandardAnalyzer").withShortName("a").create();
Option charsetOpt = obuilder.withLongName("charset").withArgument(
- abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).
- withDescription("The character encoding of the input file").withShortName("c").create();
+ abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).
+ withDescription("The character encoding of the input file").withShortName("c").create();
Option collapseOpt = obuilder.withLongName("collapse").withRequired(true).withArgument(
- abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).
- withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
+ abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).
+ withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
Option helpOpt = obuilder.withLongName("help").withRequired(true).
- withDescription("Print out help").withShortName("h").create();
+ withDescription("Print out help").withShortName("h").create();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt).withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt).create();
try {
Parser parser = new Parser();
@@ -298,7 +287,7 @@
CommandLine cmdLine = parser.parse(args);
if (cmdLine.hasOption(helpOpt)) {
-
+
return;
}
File input = new File((String) cmdLine.getValue(inputOpt));
@@ -307,7 +296,7 @@
Analyzer analyzer;
if (cmdLine.hasOption(analyzerOpt)) {
analyzer = Class.forName(
- (String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
+ (String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
} else {
analyzer = new StandardAnalyzer();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java Fri Jul 10 09:35:19 2009
@@ -17,9 +17,7 @@
package org.apache.mahout.classifier;
-/**
- * Result of a Document Classification. The label and the associated score(Usually probabilty)
- */
+/** Result of a Document Classification. The label and the associated score(Usually probabilty) */
public class ClassifierResult {
private String label;
private double score;
@@ -38,15 +36,15 @@
public String getLabel() {
return label;
- }
+ }
public double getScore() {
return score;
}
-
+
public void setLabel(String label) {
this.label = label;
- }
+ }
public void setScore(double score) {
this.score = score;
@@ -55,8 +53,8 @@
@Override
public String toString() {
return "ClassifierResult{" +
- "category='" + label + '\'' +
- ", score=" + score +
- '}';
+ "category='" + label + '\'' +
+ ", score=" + score +
+ '}';
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Fri Jul 10 09:35:19 2009
@@ -18,14 +18,13 @@
package org.apache.mahout.classifier;
import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
-
+import org.apache.commons.cli2.commandline.Parser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
@@ -42,12 +41,12 @@
import java.io.File;
import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.nio.charset.Charset;
public class Classify {
@@ -57,37 +56,37 @@
}
public static void main(String[] args)
- throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, OptionException {
+ throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, OptionException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Option pathOpt = obuilder.withLongName("path").withRequired(true).withArgument(
- abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName("p").create();
+ abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName("p").create();
Option classifyOpt = obuilder.withLongName("classify").withRequired(true).withArgument(
- abuilder.withName("classify").withMinimum(1).withMaximum(1).create()).
- withDescription("The doc to classify").withShortName("").create();
+ abuilder.withName("classify").withMinimum(1).withMaximum(1).create()).
+ withDescription("The doc to classify").withShortName("").create();
Option encodingOpt = obuilder.withLongName("encoding").withRequired(true).withArgument(
- abuilder.withName("encoding").withMinimum(1).withMaximum(1).create()).
- withDescription("The file encoding. Default: UTF-8").withShortName("e").create();
+ abuilder.withName("encoding").withMinimum(1).withMaximum(1).create()).
+ withDescription("The file encoding. Default: UTF-8").withShortName("e").create();
Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(true).withArgument(
- abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
- withDescription("The Analyzer to use").withShortName("a").create();
+ abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Analyzer to use").withShortName("a").create();
Option defaultCatOpt = obuilder.withLongName("defaultCat").withRequired(true).withArgument(
- abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).
- withDescription("The default category").withShortName("d").create();
+ abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).
+ withDescription("The default category").withShortName("d").create();
Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true).withArgument(
- abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).
- withDescription("Size of the n-gram").withShortName("ng").create();
+ abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).
+ withDescription("Size of the n-gram").withShortName("ng").create();
Option typeOpt = obuilder.withLongName("classifierType").withRequired(true).withArgument(
- abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).
- withDescription("Type of classifier").withShortName("type").create();
+ abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).
+ withDescription("Type of classifier").withShortName("type").create();
Group options = gbuilder.withName("Options").withOption(pathOpt).withOption(classifyOpt).withOption(encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt).withOption(typeOpt).create();
@@ -159,11 +158,10 @@
log.info("Converting input document to proper format");
String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
StringBuilder line = new StringBuilder();
- for(String token : document)
- {
+ for (String token : document) {
line.append(token).append(' ');
}
- List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize) ;
+ List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize);
log.info("Done converting");
log.info("Classifying document: {}", docPath);
ClassifierResult category = classifier.classify(model, doc.toArray(new String[doc.size()]), defaultCat);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Fri Jul 10 09:35:19 2009
@@ -56,11 +56,11 @@
return labels;
}
- public double getAccuracy(String label){
+ public double getAccuracy(String label) {
int labelId = labelMap.get(label);
int labelTotal = 0;
int correct = 0;
- for(int i = 0 ;i < labels.size() ;i++){
+ for (int i = 0; i < labels.size(); i++) {
labelTotal += confusionMatrix[labelId][i];
if (i == labelId) {
correct = confusionMatrix[labelId][i];
@@ -69,33 +69,33 @@
return 100.0 * correct / labelTotal;
}
- public int getCorrect(String label){
+ public int getCorrect(String label) {
int labelId = labelMap.get(label);
return confusionMatrix[labelId][labelId];
}
- public double getTotal(String label){
+ public double getTotal(String label) {
int labelId = labelMap.get(label);
int labelTotal = 0;
- for (int i = 0 ;i < labels.size() ;i++){
- labelTotal+= confusionMatrix[labelId][i];
+ for (int i = 0; i < labels.size(); i++) {
+ labelTotal += confusionMatrix[labelId][i];
}
return labelTotal;
}
public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
incrementCount(correctLabel, classifiedResult.getLabel());
- }
-
+ }
+
public void addInstance(String correctLabel, String classifiedLabel) {
incrementCount(correctLabel, classifiedLabel);
}
-
+
public int getCount(String correctLabel, String classifiedLabel) {
if (labels.contains(correctLabel)
&& labels.contains(classifiedLabel) == false && defaultLabel.equals(classifiedLabel) == false) {
- throw new IllegalArgumentException("Label not found " +correctLabel + ' ' +classifiedLabel );
+ throw new IllegalArgumentException("Label not found " + correctLabel + ' ' + classifiedLabel);
}
int correctId = labelMap.get(correctLabel);
int classifiedId = labelMap.get(classifiedLabel);
@@ -113,7 +113,7 @@
}
public void incrementCount(String correctLabel, String classifiedLabel,
- int count) {
+ int count) {
putCount(correctLabel, classifiedLabel, count
+ getCount(correctLabel, classifiedLabel));
}
@@ -123,8 +123,9 @@
}
public ConfusionMatrix merge(ConfusionMatrix b) {
- if (labels.size() != b.getLabels().size())
+ if (labels.size() != b.getLabels().size()) {
throw new IllegalArgumentException("The Labels do not Match");
+ }
//if (labels.containsAll(b.getLabels()))
// ;
@@ -172,12 +173,12 @@
static String getSmallLabel(int i) {
int val = i;
StringBuilder returnString = new StringBuilder();
- do{
+ do {
int n = val % 26;
int c = 'a';
- returnString.insert(0, (char)(c + n));
+ returnString.insert(0, (char) (c + n));
val /= 26;
- }while(val>0);
+ } while (val > 0);
return returnString.toString();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Fri Jul 10 09:35:19 2009
@@ -43,14 +43,16 @@
confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel);
}
- public ConfusionMatrix getConfusionMatrix(){
+ public ConfusionMatrix getConfusionMatrix() {
return this.confusionMatrix;
}
+
public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
- if (correctLabel.equals(classifiedResult.getLabel()))
+ if (correctLabel.equals(classifiedResult.getLabel())) {
correctlyClassified++;
- else
+ } else {
incorrectlyClassified++;
+ }
confusionMatrix.addInstance(correctLabel, classifiedResult);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java Fri Jul 10 09:35:19 2009
@@ -23,15 +23,12 @@
import org.apache.mahout.common.Model;
import java.util.Collection;
+import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
-import java.util.Deque;
-
-/**
- * Classifies documents based on a {@link BayesModel}}.
- */
+/** Classifies documents based on a {@link BayesModel}}. */
public class BayesClassifier implements Classifier {
/**
@@ -40,17 +37,17 @@
* @param model The model
* @param document The document to classify
* @param defaultCategory The default category to assign
- * @param numResults The maximum number of results to return, ranked by score.
- * Ties are broken by comparing the category
- * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
+ * @param numResults The maximum number of results to return, ranked by score. Ties are broken by comparing the
+ * category
+ * @return A Collection of {@link ClassifierResult}s.
*/
@Override
public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) {
Collection<String> categories = model.getLabels();
-
+
PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
ClassifierResult tmp;
- for (String category : categories){
+ for (String category : categories) {
double prob = documentWeight(model, category, document);
if (prob > 0.0) {
tmp = new ClassifierResult(category, prob);
@@ -62,7 +59,7 @@
while ((tmp = pq.pop()) != null) {
result.addLast(tmp);
}
- if (result.isEmpty()){
+ if (result.isEmpty()) {
result.add(new ClassifierResult(defaultCategory, 0));
}
return result;
@@ -94,12 +91,12 @@
}
/**
- * Calculate the document weight as the multiplication of the
- * {@link org.apache.mahout.common.Model#featureWeight(String, String)} for each word given the label
+ * Calculate the document weight as the multiplication of the {@link Model#featureWeight(String,
+ * String)} for each word given the label
*
- * @param model The {@link org.apache.mahout.common.Model}
- * @param label The label to calculate the probability of
- * @param document The document
+ * @param model The {@link Model}
+ * @param label The label to calculate the probability of
+ * @param document The document
* @return The probability
* @see Model#featureWeight(String, String)
*/
@@ -109,7 +106,7 @@
for (String word : document) {
int[] count = wordList.get(word);
if (count == null) {
- count = new int[] { 0 };
+ count = new int[]{0};
wordList.put(word, count);
}
count[0]++;
@@ -123,7 +120,6 @@
return result;
}
-
private static class ClassifierResultPriorityQueue extends PriorityQueue<ClassifierResult> {
private ClassifierResultPriorityQueue(int numResults) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java Fri Jul 10 09:35:19 2009
@@ -28,9 +28,7 @@
import java.io.IOException;
-/**
- * Create and run the Bayes Trainer.
- */
+/** Create and run the Bayes Trainer. */
public class BayesDriver {
private static final Logger log = LoggerFactory.getLogger(BayesDriver.class);
@@ -39,15 +37,10 @@
}
/**
- * Takes in two arguments:
- * <ol>
- * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents
- * live</li>
- * <li>The output {@link org.apache.hadoop.fs.Path} where to write the
- * {@link org.apache.mahout.common.Model} as a
- * {@link org.apache.hadoop.io.SequenceFile}</li>
- * </ol>
- *
+ * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
+ * <li>The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.common.Model} as a
+ * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
+ *
* @param args The args
*/
public static void main(String[] args) throws IOException {
@@ -59,16 +52,17 @@
/**
* Run the job
- *
- * @param input the input pathname String
+ *
+ * @param input the input pathname String
* @param output the output pathname String
*/
public static void runJob(String input, String output, int gramSize) throws IOException {
JobConf conf = new JobConf(BayesDriver.class);
Path outPath = new Path(output);
FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
- if (dfs.exists(outPath))
+ if (dfs.exists(outPath)) {
dfs.delete(outPath, true);
+ }
log.info("Reading features...");
//Read the features in each document normalized by length of each document
@@ -92,30 +86,36 @@
//Calculate the normalization factor Sigma_W_ij for each complement class.
//CBayesNormalizedWeightDriver.runJob(input, output);
- Path docCountOutPath = new Path(output+ "/trainer-docCount");
- if (dfs.exists(docCountOutPath))
+ Path docCountOutPath = new Path(output + "/trainer-docCount");
+ if (dfs.exists(docCountOutPath)) {
dfs.delete(docCountOutPath, true);
- Path termDocCountOutPath = new Path(output+ "/trainer-termDocCount");
- if (dfs.exists(termDocCountOutPath))
+ }
+ Path termDocCountOutPath = new Path(output + "/trainer-termDocCount");
+ if (dfs.exists(termDocCountOutPath)) {
dfs.delete(termDocCountOutPath, true);
- Path featureCountOutPath = new Path(output+ "/trainer-featureCount");
- if (dfs.exists(featureCountOutPath))
+ }
+ Path featureCountOutPath = new Path(output + "/trainer-featureCount");
+ if (dfs.exists(featureCountOutPath)) {
dfs.delete(featureCountOutPath, true);
- Path wordFreqOutPath = new Path(output+ "/trainer-wordFreq");
- if (dfs.exists(wordFreqOutPath))
+ }
+ Path wordFreqOutPath = new Path(output + "/trainer-wordFreq");
+ if (dfs.exists(wordFreqOutPath)) {
dfs.delete(wordFreqOutPath, true);
- Path vocabCountPath = new Path(output+ "/trainer-tfIdf/trainer-vocabCount");
- if (dfs.exists(vocabCountPath))
+ }
+ Path vocabCountPath = new Path(output + "/trainer-tfIdf/trainer-vocabCount");
+ if (dfs.exists(vocabCountPath)) {
dfs.delete(vocabCountPath, true);
+ }
/*Path tfIdfOutPath = new Path(output+ "/trainer-tfIdf");
if (dfs.exists(tfIdfOutPath))
dfs.delete(tfIdfOutPath, true);*/
- Path vocabCountOutPath = new Path(output+ "/trainer-vocabCount");
- if (dfs.exists(vocabCountOutPath))
+ Path vocabCountOutPath = new Path(output + "/trainer-vocabCount");
+ if (dfs.exists(vocabCountOutPath)) {
dfs.delete(vocabCountOutPath, true);
- /* Path weightsOutPath = new Path(output+ "/trainer-weights");
- if (dfs.exists(weightsOutPath))
- dfs.delete(weightsOutPath, true);*/
+ }
+ /* Path weightsOutPath = new Path(output+ "/trainer-weights");
+ if (dfs.exists(weightsOutPath))
+ dfs.delete(weightsOutPath, true);*/
/*Path thetaOutPath = new Path(output+ "/trainer-theta");
if (dfs.exists(thetaOutPath))
dfs.delete(thetaOutPath, true);*/
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java Fri Jul 10 09:35:19 2009
@@ -30,21 +30,20 @@
@Override
protected double getWeight(Integer label, Integer feature) {
double result = 0.0;
- Map<Integer,Double> featureWeights = featureLabelWeights.get(feature);
+ Map<Integer, Double> featureWeights = featureLabelWeights.get(feature);
+
-
if (featureWeights.containsKey(label)) {
result = featureWeights.get(label).floatValue();
}
-
+
double vocabCount = featureList.size();
double sumLabelWeight = getSumLabelWeight(label);
+ double numerator = result + alpha_i;
+ double denominator = (sumLabelWeight + vocabCount);
- double numerator = result + alpha_i;
- double denominator =(sumLabelWeight + vocabCount);
-
- double weight = Math.log(numerator /denominator);
+ double weight = Math.log(numerator / denominator);
result = -weight;
return result;
@@ -53,7 +52,7 @@
@Override
protected double getWeightUnprocessed(Integer label, Integer feature) {
double result;
- Map<Integer,Double> featureWeights = featureLabelWeights.get(feature);
+ Map<Integer, Double> featureWeights = featureLabelWeights.get(feature);
if (featureWeights.containsKey(label)) {
result = featureWeights.get(label);
@@ -85,65 +84,67 @@
@Override
public void generateModel() {
- double vocabCount = featureList.size();
+ double vocabCount = featureList.size();
- double[] perLabelThetaNormalizer = new double[labelList.size()];
+ double[] perLabelThetaNormalizer = new double[labelList.size()];
for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
- Integer featureInt = feature;
- for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+ Integer featureInt = feature;
+ for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
- Integer labelInt = label;
- double D_ij = getWeightUnprocessed(labelInt, featureInt);
- double sumLabelWeight = getSumLabelWeight(labelInt);
- //double sigma_j = getSumFeatureWeight(featureInt);
+ Integer labelInt = label;
+ double D_ij = getWeightUnprocessed(labelInt, featureInt);
+ double sumLabelWeight = getSumLabelWeight(labelInt);
+ //double sigma_j = getSumFeatureWeight(featureInt);
- double numerator = D_ij + alpha_i;
- double denominator = sumLabelWeight + vocabCount;
+ double numerator = D_ij + alpha_i;
+ double denominator = sumLabelWeight + vocabCount;
- double weight = Math.log(numerator / denominator);
+ double weight = Math.log(numerator / denominator);
- if (D_ij != 0)
- setWeight(labelInt, featureInt, weight);
+ if (D_ij != 0) {
+ setWeight(labelInt, featureInt, weight);
+ }
- perLabelThetaNormalizer[label] += weight;
+ perLabelThetaNormalizer[label] += weight;
- }
}
- log.info("Normalizing Weights");
+ }
+ log.info("Normalizing Weights");
double perLabelWeightSumNormalisationFactor = Double.MAX_VALUE;
for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
- double Sigma_W_ij = perLabelThetaNormalizer[label];
- if (perLabelWeightSumNormalisationFactor > Math.abs(Sigma_W_ij)) {
- perLabelWeightSumNormalisationFactor = Math.abs(Sigma_W_ij);
- }
+ double Sigma_W_ij = perLabelThetaNormalizer[label];
+ if (perLabelWeightSumNormalisationFactor > Math.abs(Sigma_W_ij)) {
+ perLabelWeightSumNormalisationFactor = Math.abs(Sigma_W_ij);
}
+ }
- for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
- double Sigma_W_ij = perLabelThetaNormalizer[label];
- perLabelThetaNormalizer[label] = Sigma_W_ij
- / perLabelWeightSumNormalisationFactor;
- }
+ for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+ double Sigma_W_ij = perLabelThetaNormalizer[label];
+ perLabelThetaNormalizer[label] = Sigma_W_ij
+ / perLabelWeightSumNormalisationFactor;
+ }
- for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
- Integer featureInt = feature;
- for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
- Integer labelInt = label;
- double W_ij = getWeightUnprocessed(labelInt, featureInt);
- if (W_ij == 0)
- continue;
- double Sigma_W_ij = perLabelThetaNormalizer[label];
- double normalizedWeight = -W_ij / Sigma_W_ij;
- setWeight(labelInt, featureInt, normalizedWeight);
+ for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
+ Integer featureInt = feature;
+ for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+ Integer labelInt = label;
+ double W_ij = getWeightUnprocessed(labelInt, featureInt);
+ if (W_ij == 0) {
+ continue;
}
+ double Sigma_W_ij = perLabelThetaNormalizer[label];
+ double normalizedWeight = -W_ij / Sigma_W_ij;
+ setWeight(labelInt, featureInt, normalizedWeight);
}
+ }
}
/**
* Get the weighted probability of the feature.
- *
- * @param label The label of the feature
+ *
+ * @param label The label of the feature
* @param feature The feature to calc. the prob. for
* @return The weighted probability
*/