You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/15 19:18:05 UTC
svn commit: r910282 [2/6] - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/cooccurence/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/o...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java Mon Feb 15 18:17:54 2010
@@ -234,7 +234,7 @@
}
private void buildAverageDiffs() throws TasteException {
- MemoryDiffStorage.log.info("Building average diffs...");
+ log.info("Building average diffs...");
try {
buildAverageDiffsLock.writeLock().lock();
averageDiffs.clear();
@@ -289,7 +289,7 @@
}
private long processOneUser(long averageCount, long userID) throws TasteException {
- MemoryDiffStorage.log.debug("Processing prefs for user {}", userID);
+ log.debug("Processing prefs for user {}", userID);
// Save off prefs for the life of this loop iteration
PreferenceArray userPreferences = dataModel.getPreferencesFromUser(userID);
int length = userPreferences.length();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/SlopeOneRecommender.java Mon Feb 15 18:17:54 2010
@@ -111,7 +111,7 @@
throw new IllegalArgumentException("howMany must be at least 1");
}
- SlopeOneRecommender.log.debug("Recommending items for user ID '{}'", userID);
+ log.debug("Recommending items for user ID '{}'", userID);
FastIDSet possibleItemIDs = diffStorage.getRecommendableItemIDs(userID);
@@ -120,7 +120,7 @@
List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer,
estimator);
- SlopeOneRecommender.log.debug("Recommendations are: {}", topItems);
+ log.debug("Recommendations are: {}", topItems);
return topItems;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java Mon Feb 15 18:17:54 2010
@@ -87,7 +87,7 @@
throw new IllegalArgumentException("maxEntries must be positive");
}
- FileDiffStorage.log.info("Creating FileDataModel for file {}", dataFile);
+ log.info("Creating FileDataModel for file {}", dataFile);
this.dataFile = dataFile.getAbsoluteFile();
this.lastModified = dataFile.lastModified();
@@ -120,7 +120,7 @@
updateAllRecommendableItems();
} catch (IOException ioe) {
- FileDiffStorage.log.warn("Exception while reloading", ioe);
+ log.warn("Exception while reloading", ioe);
} finally {
buildAverageDiffsLock.writeLock().unlock();
}
@@ -316,7 +316,7 @@
public void refresh(Collection<Refreshable> alreadyRefreshed) {
long mostRecentModification = dataFile.lastModified();
if (mostRecentModification > lastModified + FileDiffStorage.MIN_RELOAD_INTERVAL_MS) {
- FileDiffStorage.log.debug("File has changed; reloading...");
+ log.debug("File has changed; reloading...");
lastModified = mostRecentModification;
buildDiffs();
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/AbstractJDBCDiffStorage.java Mon Feb 15 18:17:54 2010
@@ -117,9 +117,9 @@
});
refreshHelper.addDependency(dataModel);
if (isDiffsExist()) {
- AbstractJDBCDiffStorage.log.info("Diffs already exist in database; using them instead of recomputing");
+ log.info("Diffs already exist in database; using them instead of recomputing");
} else {
- AbstractJDBCDiffStorage.log.info("No diffs exist in database; recomputing...");
+ log.info("No diffs exist in database; recomputing...");
buildAverageDiffs();
}
}
@@ -138,11 +138,11 @@
stmt.setLong(2, itemID2);
stmt.setLong(3, itemID2);
stmt.setLong(4, itemID1);
- AbstractJDBCDiffStorage.log.debug("Executing SQL query: {}", getDiffSQL);
+ log.debug("Executing SQL query: {}", getDiffSQL);
rs = stmt.executeQuery();
return rs.next() ? new FixedRunningAverage(rs.getInt(1), rs.getDouble(2)) : null;
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while retrieving diff", sqle);
+ log.warn("Exception while retrieving diff", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
@@ -163,7 +163,7 @@
stmt.setFetchSize(getFetchSize());
stmt.setLong(1, itemID);
stmt.setLong(2, userID);
- AbstractJDBCDiffStorage.log.debug("Executing SQL query: {}", getDiffsSQL);
+ log.debug("Executing SQL query: {}", getDiffsSQL);
rs = stmt.executeQuery();
// We should have up to one result for each Preference in prefs
// They are both ordered by item. Step through and create a RunningAverage[]
@@ -179,7 +179,7 @@
i++;
}
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while retrieving diff", sqle);
+ log.warn("Exception while retrieving diff", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
@@ -199,7 +199,7 @@
stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
stmt.setFetchSize(getFetchSize());
stmt.setLong(1, itemID);
- AbstractJDBCDiffStorage.log.debug("Executing SQL query: {}", getAverageItemPrefSQL);
+ log.debug("Executing SQL query: {}", getAverageItemPrefSQL);
rs = stmt.executeQuery();
if (rs.next()) {
int count = rs.getInt(1);
@@ -209,7 +209,7 @@
}
return null;
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while retrieving average item pref", sqle);
+ log.warn("Exception while retrieving average item pref", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
@@ -222,14 +222,14 @@
try {
conn = dataSource.getConnection();
if (remove) {
- AbstractJDBCDiffStorage.doPartialUpdate(removeDiffSQLs[0], itemID, prefDelta, conn);
- AbstractJDBCDiffStorage.doPartialUpdate(removeDiffSQLs[1], itemID, prefDelta, conn);
+ doPartialUpdate(removeDiffSQLs[0], itemID, prefDelta, conn);
+ doPartialUpdate(removeDiffSQLs[1], itemID, prefDelta, conn);
} else {
- AbstractJDBCDiffStorage.doPartialUpdate(updateDiffSQLs[0], itemID, prefDelta, conn);
- AbstractJDBCDiffStorage.doPartialUpdate(updateDiffSQLs[1], itemID, prefDelta, conn);
+ doPartialUpdate(updateDiffSQLs[0], itemID, prefDelta, conn);
+ doPartialUpdate(updateDiffSQLs[1], itemID, prefDelta, conn);
}
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while updating item diff", sqle);
+ log.warn("Exception while updating item diff", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(conn);
@@ -241,7 +241,7 @@
try {
stmt.setDouble(1, prefDelta);
stmt.setLong(2, itemID);
- AbstractJDBCDiffStorage.log.debug("Executing SQL update: {}", sql);
+ log.debug("Executing SQL update: {}", sql);
stmt.executeUpdate();
} finally {
IOUtils.quietClose(stmt);
@@ -262,7 +262,7 @@
stmt.setLong(1, userID);
stmt.setLong(2, userID);
stmt.setLong(3, userID);
- AbstractJDBCDiffStorage.log.debug("Executing SQL query: {}", getRecommendableItemsSQL);
+ log.debug("Executing SQL query: {}", getRecommendableItemsSQL);
rs = stmt.executeQuery();
FastIDSet itemIDs = new FastIDSet();
while (rs.next()) {
@@ -270,7 +270,7 @@
}
return itemIDs;
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while retrieving recommendable items", sqle);
+ log.warn("Exception while retrieving recommendable items", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
@@ -284,7 +284,7 @@
PreparedStatement stmt = null;
try {
stmt = conn.prepareStatement(deleteDiffsSQL);
- AbstractJDBCDiffStorage.log.debug("Executing SQL update: {}", deleteDiffsSQL);
+ log.debug("Executing SQL update: {}", deleteDiffsSQL);
stmt.executeUpdate();
} finally {
IOUtils.quietClose(stmt);
@@ -292,13 +292,13 @@
try {
stmt = conn.prepareStatement(createDiffsSQL);
stmt.setInt(1, minDiffCount);
- AbstractJDBCDiffStorage.log.debug("Executing SQL update: {}", createDiffsSQL);
+ log.debug("Executing SQL update: {}", createDiffsSQL);
stmt.executeUpdate();
} finally {
IOUtils.quietClose(stmt);
}
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while updating/deleting diffs", sqle);
+ log.warn("Exception while updating/deleting diffs", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(conn);
@@ -314,12 +314,12 @@
stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
stmt.setFetchSize(getFetchSize());
- AbstractJDBCDiffStorage.log.debug("Executing SQL query: {}", diffsExistSQL);
+ log.debug("Executing SQL query: {}", diffsExistSQL);
rs = stmt.executeQuery(diffsExistSQL);
rs.next();
return rs.getInt(1) > 0;
} catch (SQLException sqle) {
- AbstractJDBCDiffStorage.log.warn("Exception while deleting diffs", sqle);
+ log.warn("Exception while deleting diffs", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Mon Feb 15 18:17:54 2010
@@ -87,10 +87,8 @@
private static final int DEFAULT_MIN_DIFF_COUNT = 2;
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel) throws TasteException {
- this(dataModel, AbstractJDBCDiffStorage.DEFAULT_DIFF_TABLE,
- AbstractJDBCDiffStorage.DEFAULT_ITEM_A_COLUMN, AbstractJDBCDiffStorage.DEFAULT_ITEM_B_COLUMN,
- AbstractJDBCDiffStorage.DEFAULT_COUNT_COLUMN, AbstractJDBCDiffStorage.DEFAULT_AVERAGE_DIFF_COLUMN,
- MySQLJDBCDiffStorage.DEFAULT_MIN_DIFF_COUNT);
+ this(dataModel, DEFAULT_DIFF_TABLE, DEFAULT_ITEM_A_COLUMN, DEFAULT_ITEM_B_COLUMN, DEFAULT_COUNT_COLUMN,
+ DEFAULT_AVERAGE_DIFF_COLUMN, DEFAULT_MIN_DIFF_COUNT);
}
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java Mon Feb 15 18:17:54 2010
@@ -177,7 +177,7 @@
throw new IllegalArgumentException("howMany must be at least 1");
}
- SVDRecommender.log.debug("Recommending items for user ID '{}'", userID);
+ log.debug("Recommending items for user ID '{}'", userID);
FastIDSet possibleItemIDs = getAllOtherItems(userID);
@@ -186,7 +186,7 @@
List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer,
estimator);
- SVDRecommender.log.debug("Recommendations are: {}", topItems);
+ log.debug("Recommendations are: {}", topItems);
return topItems;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java Mon Feb 15 18:17:54 2010
@@ -136,7 +136,8 @@
* @throws TasteException
* if an error occurs while accessing the {@link DataModel} items
*/
- public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep) throws TasteException {
+ public GenericItemSimilarity(ItemSimilarity otherSimilarity,
+ DataModel dataModel, int maxToKeep) throws TasteException {
long[] itemIDs = IteratorUtils.longIteratorToList(dataModel.getItemIDs());
Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, itemIDs);
Iterable<ItemItemSimilarity> keptSimilarities = TopItems.getTopItemItemSimilarities(maxToKeep,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java Mon Feb 15 18:17:54 2010
@@ -52,7 +52,8 @@
initSimilarityMaps(new IteratorIterable<UserUserSimilarity>(it));
}
- public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep) throws TasteException {
+ public GenericUserSimilarity(UserSimilarity otherSimilarity,
+ DataModel dataModel, int maxToKeep) throws TasteException {
long[] userIDs = IteratorUtils.longIteratorToList(dataModel.getUserIDs());
Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, userIDs);
Iterable<UserUserSimilarity> keptSimilarities = TopItems.getTopUserUserSimilarities(maxToKeep,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/LogLikelihoodSimilarity.java Mon Feb 15 18:17:54 2010
@@ -83,8 +83,9 @@
static double twoLogLambda(double k1, double k2, double n1, double n2) {
double p = (k1 + k2) / (n1 + n2);
return 2.0 * (LogLikelihoodSimilarity.logL(k1 / n1, k1, n1)
- + LogLikelihoodSimilarity.logL(k2 / n2, k2, n2) - LogLikelihoodSimilarity.logL(p, k1, n1) - LogLikelihoodSimilarity
- .logL(p, k2, n2));
+ + LogLikelihoodSimilarity.logL(k2 / n2, k2, n2)
+ - LogLikelihoodSimilarity.logL(p, k1, n1)
+ - LogLikelihoodSimilarity.logL(p, k2, n2));
}
private static double logL(double p, double k, double n) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCItemSimilarity.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCItemSimilarity.java Mon Feb 15 18:17:54 2010
@@ -55,10 +55,8 @@
private final String getItemItemSimilaritySQL;
protected AbstractJDBCItemSimilarity(DataSource dataSource, String getItemItemSimilaritySQL) {
- this(dataSource, AbstractJDBCItemSimilarity.DEFAULT_SIMILARITY_TABLE,
- AbstractJDBCItemSimilarity.DEFAULT_ITEM_A_ID_COLUMN,
- AbstractJDBCItemSimilarity.DEFAULT_ITEM_B_ID_COLUMN,
- AbstractJDBCItemSimilarity.DEFAULT_SIMILARITY_COLUMN, getItemItemSimilaritySQL);
+ this(dataSource, DEFAULT_SIMILARITY_TABLE, DEFAULT_ITEM_A_ID_COLUMN, DEFAULT_ITEM_B_ID_COLUMN,
+ DEFAULT_SIMILARITY_COLUMN, getItemItemSimilaritySQL);
}
protected AbstractJDBCItemSimilarity(DataSource dataSource,
@@ -75,7 +73,7 @@
AbstractJDBCComponent.checkNotNullAndLog("getItemItemSimilaritySQL", getItemItemSimilaritySQL);
if (!(dataSource instanceof ConnectionPoolDataSource)) {
- AbstractJDBCItemSimilarity.log
+ log
.warn("You are not using ConnectionPoolDataSource. Make sure your DataSource pools connections "
+ "to the database itself, or database performance will be severely reduced.");
}
@@ -130,7 +128,7 @@
stmt.setLong(1, itemID1);
stmt.setLong(2, itemID2);
- AbstractJDBCItemSimilarity.log.debug("Executing SQL query: {}", getItemItemSimilaritySQL);
+ log.debug("Executing SQL query: {}", getItemItemSimilaritySQL);
rs = stmt.executeQuery();
if (rs.next()) {
@@ -140,7 +138,7 @@
}
} catch (SQLException sqle) {
- AbstractJDBCItemSimilarity.log.warn("Exception while retrieving user", sqle);
+ log.warn("Exception while retrieving user", sqle);
throw new TasteException(sqle);
} finally {
IOUtils.quietClose(rs, stmt, conn);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Mon Feb 15 18:17:54 2010
@@ -72,7 +72,8 @@
* @param outputFile
* The file to collapse to
*/
- public static void collapse(String label, Analyzer analyzer, File inputDir, Charset charset, File outputFile) throws IOException {
+ public static void collapse(String label, Analyzer analyzer, File inputDir,
+ Charset charset, File outputFile) throws IOException {
Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile), charset);
try {
inputDir.listFiles(new FileProcessor(label, analyzer, charset, writer));
@@ -97,7 +98,8 @@
* @param outDir
* The output directory. Files will be written there with the same name as the input file
*/
- public static void format(String label, Analyzer analyzer, File input, Charset charset, File outDir) throws IOException {
+ public static void format(String label, Analyzer analyzer, File input,
+ Charset charset, File outDir) throws IOException {
if (input.isDirectory()) {
input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
} else {
@@ -111,7 +113,8 @@
}
/**
- * Hack the FileFilter mechanism so that we don't get stuck on large directories and don't have to loop the list twice
+ * Hack the FileFilter mechanism so that we don't get stuck on large directories and
+ * don't have to loop the list twice
*/
private static final class FileProcessor implements FileFilter {
private final String label;
@@ -198,7 +201,8 @@
* @throws java.io.IOException
* if there was a problem w/ the reader
*/
- private static void writeFile(String label, Analyzer analyzer, File inFile, Charset charset, Writer writer) throws IOException {
+ private static void writeFile(String label, Analyzer analyzer, File inFile,
+ Charset charset, Writer writer) throws IOException {
Reader reader = new InputStreamReader(new FileInputStream(inFile), charset);
try {
TokenStream ts = analyzer.tokenStream(label, reader);
@@ -322,7 +326,7 @@
}
} catch (OptionException e) {
- BayesFileFormatter.log.error("Exception", e);
+ log.error("Exception", e);
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Mon Feb 15 18:17:54 2010
@@ -111,7 +111,7 @@
String modelBasePath = (String) cmdLine.getValue(pathOpt);
- Classify.log.info("Loading model from: {}", params.print());
+ log.info("Loading model from: {}", params.print());
Algorithm algorithm;
Datastore datastore;
@@ -121,11 +121,11 @@
String dataSource = (String) cmdLine.getValue(dataSourceOpt);
if (dataSource.equals("hdfs")) {
if (classifierType.equalsIgnoreCase("bayes")) {
- Classify.log.info("Using Bayes Classifier");
+ log.info("Using Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else if (classifierType.equalsIgnoreCase("cbayes")) {
- Classify.log.info("Using Complementary Bayes Classifier");
+ log.info("Using Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else {
@@ -134,11 +134,11 @@
} else if (dataSource.equals("hbase")) {
if (classifierType.equalsIgnoreCase("bayes")) {
- Classify.log.info("Using Bayes Classifier");
+ log.info("Using Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new HBaseBayesDatastore(modelBasePath, params);
} else if (classifierType.equalsIgnoreCase("cbayes")) {
- Classify.log.info("Using Complementary Bayes Classifier");
+ log.info("Using Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new HBaseBayesDatastore(modelBasePath, params);
} else {
@@ -168,7 +168,7 @@
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
}
- Classify.log.info("Converting input document to proper format");
+ log.info("Converting input document to proper format");
String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(
new FileInputStream(docPath), Charset.forName(encoding)));
StringBuilder line = new StringBuilder();
@@ -178,10 +178,10 @@
List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();
- Classify.log.info("Done converting");
- Classify.log.info("Classifying document: {}", docPath);
+ log.info("Done converting");
+ log.info("Classifying document: {}", docPath);
ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
- Classify.log.info("Category for {} is {}", docPath, category);
+ log.info("Category for {} is {}", docPath, category);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java Mon Feb 15 18:17:54 2010
@@ -180,7 +180,7 @@
}
public static void classifySequential(BayesParameters params) throws IOException, InvalidDatastoreException {
- TestClassifier.log.info("Loading model from: {}", params.print());
+ log.info("Loading model from: {}", params.print());
boolean verbose = Boolean.valueOf(params.get("verbose"));
File dir = new File(params.get("testDirPath"));
File[] subdirs = dir.listFiles(new FilenameFilter() {
@@ -195,11 +195,11 @@
if (params.get("dataSource").equals("hdfs")) {
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
- TestClassifier.log.info("Testing Bayes Classifier");
+ log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
- TestClassifier.log.info("Testing Complementary Bayes Classifier");
+ log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else {
@@ -208,11 +208,11 @@
} else if (params.get("dataSource").equals("hbase")) {
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
- TestClassifier.log.info("Testing Bayes Classifier");
+ log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new HBaseBayesDatastore(params.get("basePath"), params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
- TestClassifier.log.info("Testing Complementary Bayes Classifier");
+ log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new HBaseBayesDatastore(params.get("basePath"), params);
} else {
@@ -229,8 +229,8 @@
if (subdirs != null) {
for (File file : subdirs) {
- TestClassifier.log.info("--------------");
- TestClassifier.log.info("Testing: {}", file);
+ log.info("--------------");
+ log.info("Testing: {}", file);
String correctLabel = file.getName().split(".txt")[0];
TimingStatistics operationStats = new TimingStatistics();
@@ -251,26 +251,26 @@
boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
if (verbose) {
// We have one document per line
- TestClassifier.log.info(
+ log.info(
"Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
classifiedLabel.getLabel(), correct,});
}
- // log.info("{} {}", correctLabel, classifiedLabel);
+ //log.info("{} {}", correctLabel, classifiedLabel);
}
lineNum++;
}
- log.info("{}\t{}\t{}/{}",
+ log.info("{}\t{}\t{}/{}",
new Object[] {correctLabel, resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)});
- log.info("{}", operationStats.toString());
+ log.info("{}", operationStats.toString());
}
}
- log.info("{}", totalStatistics.toString());
- log.info(resultAnalyzer.summarize());
+ log.info("{}", totalStatistics.toString());
+ log.info(resultAnalyzer.summarize());
}
public static void classifyParallel(BayesParameters params) throws IOException {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Mon Feb 15 18:17:54 2010
@@ -118,18 +118,18 @@
}
if (classifierType.equalsIgnoreCase("bayes")) {
- TrainClassifier.log.info("Training Bayes Classifier");
+ log.info("Training Bayes Classifier");
TrainClassifier.trainNaiveBayes((String) cmdLine.getValue(inputDirOpt), (String) cmdLine
.getValue(outputOpt), params);
} else if (classifierType.equalsIgnoreCase("cbayes")) {
- TrainClassifier.log.info("Training Complementary Bayes Classifier");
+ log.info("Training Complementary Bayes Classifier");
// setup the HDFS and copy the files there, then run the trainer
TrainClassifier.trainCNaiveBayes((String) cmdLine.getValue(inputDirOpt), (String) cmdLine
.getValue(outputOpt), params);
}
} catch (OptionException e) {
- TrainClassifier.log.error("Error while parsing options", e);
+ log.error("Error while parsing options", e);
CommandLineUtil.printHelp(group);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Mon Feb 15 18:17:54 2010
@@ -102,9 +102,7 @@
@Override
public void initialize(Datastore datastore) throws InvalidDatastoreException {
- datastore.getWeight("weight", "test", "test");
- datastore.getWeight("labelWeight", "test");
- datastore.getWeight("thetaNormalizer", "test");
+ datastore.getKeys("labelWeight");
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Mon Feb 15 18:17:54 2010
@@ -111,9 +111,7 @@
@Override
public void initialize(Datastore datastore) throws InvalidDatastoreException {
- datastore.getWeight("weight", "test", "test");
- datastore.getWeight("labelWeight", "test");
- datastore.getWeight("thetaNormalizer", "test");
+ datastore.getKeys("labelWeight");
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Mon Feb 15 18:17:54 2010
@@ -128,7 +128,7 @@
BayesConstants.LABEL_THETA_NORMALIZER, label)));
}
for (String label : labels) {
- HBaseBayesDatastore.log.info("{} {} {} {}", new Object[] {
+ log.info("{} {} {} {}", new Object[] {
label,
getWeightFromHbase(
BayesConstants.LABEL_THETA_NORMALIZER,
@@ -156,7 +156,7 @@
}
if (r == null) {
- HBaseBayesDatastore.log.error("Encountered NULL");
+ log.error("Encountered NULL");
throw new InvalidDatastoreException("Encountered NULL");
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Mon Feb 15 18:17:54 2010
@@ -82,7 +82,7 @@
throw new InvalidDatastoreException(e.getMessage());
}
for (String label : getKeys("")) {
- InMemoryBayesDatastore.log.info("{} {} {} {}", new Object[] {label,
+ log.info("{} {} {} {}", new Object[] {label,
thetaNormalizerPerLabel
.get(getLabelID(label)),
thetaNormalizer,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java Mon Feb 15 18:17:54 2010
@@ -68,7 +68,7 @@
FileStatus[] outputFiles = fs.globStatus(pathPattern);
for (FileStatus fileStatus : outputFiles) {
Path path = fileStatus.getPath();
- SequenceFileModelReader.log.info("{}", path);
+ log.info("{}", path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
// the key is label,feature
@@ -91,7 +91,7 @@
FileStatus[] outputFiles = fs.globStatus(pathPattern);
for (FileStatus fileStatus : outputFiles) {
Path path = fileStatus.getPath();
- SequenceFileModelReader.log.info("{}", path);
+ log.info("{}", path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
// the key is either _label_ or label,feature
@@ -102,7 +102,7 @@
datastore.setSumFeatureWeight(key.stringAt(1), value.get());
count++;
if (count % 50000 == 0) {
- SequenceFileModelReader.log.info("Read {} feature weights", count);
+ log.info("Read {} feature weights", count);
}
}
}
@@ -120,7 +120,7 @@
FileStatus[] outputFiles = fs.globStatus(pathPattern);
for (FileStatus fileStatus : outputFiles) {
Path path = fileStatus.getPath();
- SequenceFileModelReader.log.info("{}", path);
+ log.info("{}", path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
long count = 0;
@@ -130,7 +130,7 @@
datastore.setSumLabelWeight(key.stringAt(1), value.get());
count++;
if (count % 10000 == 0) {
- SequenceFileModelReader.log.info("Read {} label weights", count);
+ log.info("Read {} label weights", count);
}
}
}
@@ -148,7 +148,7 @@
FileStatus[] outputFiles = fs.globStatus(pathPattern);
for (FileStatus fileStatus : outputFiles) {
Path path = fileStatus.getPath();
- SequenceFileModelReader.log.info("{}", path);
+ log.info("{}", path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
long count = 0;
@@ -158,7 +158,7 @@
datastore.setThetaNormalizer(key.stringAt(1), value.get());
count++;
if (count % 50000 == 0) {
- SequenceFileModelReader.log.info("Read {} theta norms", count);
+ log.info("Read {} theta norms", count);
}
}
}
@@ -176,7 +176,7 @@
FileStatus[] outputFiles = fs.globStatus(pathPattern);
for (FileStatus fileStatus : outputFiles) {
Path path = fileStatus.getPath();
- SequenceFileModelReader.log.info("{}", path);
+ log.info("{}", path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
// the key is _label
@@ -186,7 +186,7 @@
// weights for
// all Features and all Labels
datastore.setSigmaJSigmaK(value.get());
- SequenceFileModelReader.log.info("{}", value.get());
+ log.info("{}", value.get());
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java Mon Feb 15 18:17:54 2010
@@ -84,7 +84,7 @@
Path outputFiles = new Path(outPath.toString() + "/part*");
ConfusionMatrix matrix = BayesClassifierDriver.readResult(dfs, outputFiles, conf, params);
- BayesClassifierDriver.log.info("{}", matrix.summarize());
+ log.info("{}", matrix.summarize());
}
private static ConfusionMatrix readResult(FileSystem fs,
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java Mon Feb 15 18:17:54 2010
@@ -96,19 +96,19 @@
@Override
public void configure(JobConf job) {
try {
- BayesClassifierMapper.log.info("Bayes Parameter {}", job.get("bayes.parameters"));
+ log.info("Bayes Parameter {}", job.get("bayes.parameters"));
Parameters params = Parameters.fromString(job.get("bayes.parameters", ""));
- BayesClassifierMapper.log.info("{}", params.print());
+ log.info("{}", params.print());
Algorithm algorithm;
Datastore datastore;
if (params.get("dataSource").equals("hdfs")) {
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
- BayesClassifierMapper.log.info("Testing Bayes Classifier");
+ log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
- BayesClassifierMapper.log.info("Testing Complementary Bayes Classifier");
+ log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new InMemoryBayesDatastore(params);
} else {
@@ -117,11 +117,11 @@
} else if (params.get("dataSource").equals("hbase")) {
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
- BayesClassifierMapper.log.info("Testing Bayes Classifier");
+ log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
datastore = new HBaseBayesDatastore(params.get("basePath"), params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
- BayesClassifierMapper.log.info("Testing Complementary Bayes Classifier");
+ log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
datastore = new HBaseBayesDatastore(params.get("basePath"), params);
} else {
@@ -137,9 +137,9 @@
defaultCategory = params.get("defaultCat");
gramSize = Integer.valueOf(params.get("gramSize"));
} catch (IOException ex) {
- BayesClassifierMapper.log.warn(ex.toString(), ex);
+ log.warn(ex.toString(), ex);
} catch (InvalidDatastoreException e) {
- BayesClassifierMapper.log.error(e.toString(), e);
+ log.error(e.toString(), e);
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java Mon Feb 15 18:17:54 2010
@@ -55,23 +55,23 @@
dfs.delete(outPath, true);
}
- BayesDriver.log.info("Reading features...");
+ log.info("Reading features...");
// Read the features in each document normalized by length of each document
BayesFeatureDriver feature = new BayesFeatureDriver();
feature.runJob(input, output, params);
- BayesDriver.log.info("Calculating Tf-Idf...");
+ log.info("Calculating Tf-Idf...");
// Calculate the TfIdf for each word in each label
BayesTfIdfDriver tfidf = new BayesTfIdfDriver();
tfidf.runJob(input, output, params);
- BayesDriver.log.info("Calculating weight sums for labels and features...");
+ log.info("Calculating weight sums for labels and features...");
// Calculate the Sums of weights for each label, for each feature and for
// each feature and for each label
BayesWeightSummerDriver summer = new BayesWeightSummerDriver();
summer.runJob(input, output, params);
- BayesDriver.log.info("Calculating the weight Normalisation factor for each class...");
+ log.info("Calculating the weight Normalisation factor for each class...");
// Calculate the normalization factor Sigma_W_ij for each complement class.
BayesThetaNormalizerDriver normalizer = new BayesThetaNormalizerDriver();
normalizer.runJob(input, output, params);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java Mon Feb 15 18:17:54 2010
@@ -87,9 +87,9 @@
GenericsUtil.getClass(labelWeightSum));
String labelWeightSumString = mapStringifier.toString(labelWeightSum);
- BayesThetaNormalizerDriver.log.info("Sigma_k for Each Label");
+ log.info("Sigma_k for Each Label");
Map<String,Double> c = mapStringifier.fromString(labelWeightSumString);
- BayesThetaNormalizerDriver.log.info("{}", c);
+ log.info("{}", c);
conf.set("cnaivebayes.sigma_k", labelWeightSumString);
Path sigmaJSigmaKFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
@@ -97,19 +97,19 @@
DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
- BayesThetaNormalizerDriver.log.info("Sigma_kSigma_j for each Label and for each Features");
+ log.info("Sigma_kSigma_j for each Label and for each Features");
double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
- BayesThetaNormalizerDriver.log.info("{}", retSigmaJSigmaK);
+ log.info("{}", retSigmaJSigmaK);
conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
String vocabCountString = stringifier.toString(vocabCount);
- BayesThetaNormalizerDriver.log.info("Vocabulary Count");
+ log.info("Vocabulary Count");
conf.set("cnaivebayes.vocabCount", vocabCountString);
double retvocabCount = stringifier.fromString(vocabCountString);
- BayesThetaNormalizerDriver.log.info("{}", retvocabCount);
+ log.info("{}", retvocabCount);
conf.set("bayes.parameters", params.toString());
conf.set("output.table", output);
client.setConf(conf);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java Mon Feb 15 18:17:54 2010
@@ -103,7 +103,7 @@
alphaI = Double.valueOf(params.get("alpha_i", "1.0"));
} catch (IOException ex) {
- BayesThetaNormalizerMapper.log.warn(ex.toString(), ex);
+ log.warn(ex.toString(), ex);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java Mon Feb 15 18:17:54 2010
@@ -91,7 +91,7 @@
HBaseConfiguration hBconf = new HBaseConfiguration(job);
table = new HTable(hBconf, job.get("output.table"));
} catch (IOException e) {
- BayesThetaNormalizerReducer.log.error("Unexpected error during configuration", e);
+ log.error("Unexpected error during configuration", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java Mon Feb 15 18:17:54 2010
@@ -55,23 +55,23 @@
dfs.delete(outPath, true);
}
- CBayesDriver.log.info("Reading features...");
+ log.info("Reading features...");
// Read the features in each document normalized by length of each document
BayesFeatureDriver feature = new BayesFeatureDriver();
feature.runJob(input, output, params);
- CBayesDriver.log.info("Calculating Tf-Idf...");
+ log.info("Calculating Tf-Idf...");
// Calculate the TfIdf for each word in each label
BayesTfIdfDriver tfidf = new BayesTfIdfDriver();
tfidf.runJob(input, output, params);
- CBayesDriver.log.info("Calculating weight sums for labels and features...");
+ log.info("Calculating weight sums for labels and features...");
// Calculate the Sums of weights for each label, for each feature and for
// each feature and for each label
BayesWeightSummerDriver summer = new BayesWeightSummerDriver();
summer.runJob(input, output, params);
- CBayesDriver.log.info("Calculating the weight Normalisation factor for each complement class...");
+ log.info("Calculating the weight Normalisation factor for each complement class...");
// Calculate the normalization factor Sigma_W_ij for each complement class.
CBayesThetaNormalizerDriver normalizer = new CBayesThetaNormalizerDriver();
normalizer.runJob(input, output, params);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java Mon Feb 15 18:17:54 2010
@@ -88,9 +88,9 @@
GenericsUtil.getClass(labelWeightSum));
String labelWeightSumString = mapStringifier.toString(labelWeightSum);
- CBayesThetaNormalizerDriver.log.info("Sigma_k for Each Label");
+ log.info("Sigma_k for Each Label");
Map<String,Double> c = mapStringifier.fromString(labelWeightSumString);
- CBayesThetaNormalizerDriver.log.info("{}", c);
+ log.info("{}", c);
conf.set("cnaivebayes.sigma_k", labelWeightSumString);
Path sigmaKSigmaJFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
@@ -98,19 +98,19 @@
DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
- CBayesThetaNormalizerDriver.log.info("Sigma_kSigma_j for each Label and for each Features");
+ log.info("Sigma_kSigma_j for each Label and for each Features");
double retSigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
- CBayesThetaNormalizerDriver.log.info("{}", retSigmaJSigmaK);
+ log.info("{}", retSigmaJSigmaK);
conf.set("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
String vocabCountString = stringifier.toString(vocabCount);
- CBayesThetaNormalizerDriver.log.info("Vocabulary Count");
+ log.info("Vocabulary Count");
conf.set("cnaivebayes.vocabCount", vocabCountString);
double retvocabCount = stringifier.fromString(vocabCountString);
- CBayesThetaNormalizerDriver.log.info("{}", retvocabCount);
+ log.info("{}", retvocabCount);
conf.set("bayes.parameters", params.toString());
conf.set("output.table", output);
client.setConf(conf);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java Mon Feb 15 18:17:54 2010
@@ -133,7 +133,7 @@
alphaI = Double.valueOf(params.get("alpha_i", "1.0"));
} catch (IOException ex) {
- CBayesThetaNormalizerMapper.log.warn(ex.toString(), ex);
+ log.warn(ex.toString(), ex);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java Mon Feb 15 18:17:54 2010
@@ -92,7 +92,7 @@
hBconf.set(new HBaseConfiguration(job));
table = new HTable(hBconf.get(), job.get("output.table"));
} catch (IOException e) {
- CBayesThetaNormalizerReducer.log.error("Unexpected error during configuration", e);
+ log.error("Unexpected error during configuration", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Mon Feb 15 18:17:54 2010
@@ -163,12 +163,12 @@
@Override
public void configure(JobConf job) {
try {
- BayesFeatureMapper.log.info("Bayes Parameter {}", job.get("bayes.parameters"));
+ log.info("Bayes Parameter {}", job.get("bayes.parameters"));
Parameters params = Parameters.fromString(job.get("bayes.parameters", ""));
gramSize = Integer.valueOf(params.get("gramSize"));
} catch (IOException ex) {
- BayesFeatureMapper.log.warn(ex.toString(), ex);
+ log.warn(ex.toString(), ex);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java Mon Feb 15 18:17:54 2010
@@ -104,12 +104,12 @@
GenericsUtil.getClass(labelDocumentCounts));
String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts);
- BayesTfIdfDriver.log.info("Counts of documents in Each Label");
+ log.info("Counts of documents in Each Label");
Map<String,Double> c = mapStringifier.fromString(labelDocumentCountString);
- BayesTfIdfDriver.log.info("{}", c);
+ log.info("{}", c);
conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString);
- BayesTfIdfDriver.log.info(params.print());
+ log.info(params.print());
if (params.get("dataSource").equals("hbase")) {
HBaseConfiguration hc = new HBaseConfiguration(new Configuration());
HTableDescriptor ht = new HTableDescriptor(output);
@@ -120,9 +120,9 @@
hcd.setBlockCacheEnabled(true);
ht.addFamily(hcd);
- BayesTfIdfDriver.log.info("Connecting to hbase...");
+ log.info("Connecting to hbase...");
HBaseAdmin hba = new HBaseAdmin(hc);
- BayesTfIdfDriver.log.info("Creating Table {}", output);
+ log.info("Creating Table {}", output);
if (hba.tableExists(output)) {
hba.disableTable(output);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java Mon Feb 15 18:17:54 2010
@@ -77,7 +77,7 @@
}
} else if (key.length() == 2) {
if (key.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
- output.collect(BayesTfIdfMapper.VOCAB_COUNT, BayesTfIdfMapper.ONE);
+ output.collect(VOCAB_COUNT, ONE);
reporter.setStatus("Bayes TfIdf Mapper: vocabCount");
} else {
throw new IllegalArgumentException("Unexpected Tuple: " + key);
@@ -104,7 +104,7 @@
}
} catch (IOException ex) {
- BayesTfIdfMapper.log.warn(ex.toString(), ex);
+ log.warn(ex.toString(), ex);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java Mon Feb 15 18:17:54 2010
@@ -63,7 +63,7 @@
vocabCount += values.next().get();
}
- BayesTfIdfReducer.log.info("{}\t{}", key, vocabCount);
+ log.info("{}\t{}", key, vocabCount);
if (useHbase) {
Put bu = new Put(Bytes.toBytes(BayesConstants.HBASE_COUNTS_ROW));
bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
@@ -111,7 +111,7 @@
table = new HTable(hBconf, job.get("output.table"));
} catch (IOException e) {
- BayesTfIdfReducer.log.error("Unexpected error during configuration", e);
+ log.error("Unexpected error during configuration", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java Mon Feb 15 18:17:54 2010
@@ -102,7 +102,7 @@
HBaseConfiguration hBconf = new HBaseConfiguration(job);
table = new HTable(hBconf, job.get("output.table"));
} catch (IOException e) {
- BayesWeightSummerReducer.log.error("Unexpected error during configuration", e);
+ log.error("Unexpected error during configuration", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Mon Feb 15 18:17:54 2010
@@ -76,7 +76,7 @@
job.runJob(input, output, new BayesParameters(1));
} catch (OptionException e) {
- JobExecutor.log.error(e.getMessage());
+ log.error(e.getMessage());
CommandLineUtil.printHelp(group);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearModel.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearModel.java Mon Feb 15 18:17:54 2010
@@ -34,7 +34,7 @@
private double bias;
/** Classification threshold. */
private final double threshold;
-
+
/**
* Init a linear model with a hyperplane, distance and displacement.
* */
@@ -43,14 +43,14 @@
this.bias = displacement;
this.threshold = threshold;
}
-
+
/**
* Init a linear model with zero displacement and a threshold of 0.5.
* */
public LinearModel(Vector hyperplane) {
this(hyperplane, 0, 0.5);
}
-
+
/**
* Classify a point to either belong to the class modeled by this linear model or not.
* @param dataPoint the data point to classify.
@@ -58,20 +58,21 @@
* */
public boolean classify(Vector dataPoint) throws CardinalityException, IndexException {
double product = this.hyperplane.dot(dataPoint);
- if (LOG.isDebugEnabled()) {
- LOG.debug("model: " + this + " product: " + product + " Bias: " + this.bias + " threshold: " + this.threshold);
+ if (LinearModel.LOG.isDebugEnabled()) {
+ LinearModel.LOG.debug("model: " + this + " product: " + product + " Bias: " + this.bias
+ + " threshold: " + this.threshold);
}
- return ((product + this.bias) > this.threshold);
+ return product + this.bias > this.threshold;
}
-
+
/**
* Update the hyperplane by adding delta.
* @param delta the delta to add to the hyperplane vector.
* */
public void addDelta(Vector delta) {
- this.hyperplane = this.hyperplane.plus(delta);
+ this.hyperplane = this.hyperplane.plus(delta);
}
-
+
@Override
public String toString() {
StringBuilder builder = new StringBuilder("Model: ");
@@ -81,7 +82,7 @@
builder.append(" C: ").append(this.bias);
return builder.toString();
}
-
+
/**
* Shift the bias of the model.
* @param factor factor to multiply the bias by.
@@ -89,7 +90,7 @@
public synchronized void shiftBias(double factor) {
this.bias += factor;
}
-
+
/**
* Multiply the weight at index by delta.
* @param index the index of the element to update.
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearTrainer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearTrainer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearTrainer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/LinearTrainer.java Mon Feb 15 18:17:54 2010
@@ -33,12 +33,12 @@
* Hadoop.
*/
public abstract class LinearTrainer {
-
+
/** Logger for this class. */
private static final Logger LOG = LoggerFactory.getLogger(LinearTrainer.class);
/** The model to train. */
private final LinearModel model;
-
+
/**
* Initialize the trainer. Distance is initialized to cosine distance, all
* weights are represented through a dense vector.
@@ -54,12 +54,12 @@
* initial classification bias.
* */
protected LinearTrainer(int dimension, double threshold,
- double init, double initBias) throws CardinalityException {
+ double init, double initBias) throws CardinalityException {
DenseVector initialWeights = new DenseVector(dimension);
initialWeights.assign(init);
this.model = new LinearModel(initialWeights, initBias, threshold);
}
-
+
/**
* Initializes training. Runs through all data points in the training set and
* updates the weight vector whenever a classification error occurs.
@@ -73,31 +73,31 @@
* of data- and labelset do not match, a CardinalityException is
* thrown
* */
- public void train(Vector labelset, Matrix dataset)
- throws IndexException, CardinalityException, TrainingException {
+ public void train(Vector labelset, Matrix dataset) throws IndexException,
+ CardinalityException,
+ TrainingException {
if (labelset.size() != dataset.size()[1]) {
throw new CardinalityException();
}
-
+
boolean converged = false;
int iteration = 0;
while (!converged) {
if (iteration > 1000) {
- throw new TrainingException(
- "Too many iterations needed to find hyperplane.");
+ throw new TrainingException("Too many iterations needed to find hyperplane.");
}
-
+
converged = true;
int columnCount = dataset.size()[1];
for (int i = 0; i < columnCount; i++) {
Vector dataPoint = dataset.getColumn(i);
- LOG.debug("Training point: " + dataPoint);
-
+ LinearTrainer.LOG.debug("Training point: " + dataPoint);
+
synchronized (this.model) {
boolean prediction = model.classify(dataPoint);
double label = labelset.get(i);
- if ((label <= 0 && prediction) || (label > 0 && !prediction)) {
- LOG.debug("updating");
+ if (label <= 0 && prediction || label > 0 && !prediction) {
+ LinearTrainer.LOG.debug("updating");
converged = false;
update(label, dataPoint, this.model);
}
@@ -105,14 +105,14 @@
}
}
}
-
+
/**
* Retrieves the trained model if called after train, otherwise the raw model.
* */
public LinearModel getModel() {
return this.model;
}
-
+
/**
* Implement this method to match your training strategy.
*
@@ -123,7 +123,6 @@
* @param dataPoint
* the data point that was classified incorrectly.
* */
- protected abstract void update(double label, Vector dataPoint,
- LinearModel model);
-
+ protected abstract void update(double label, Vector dataPoint, LinearModel model);
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/PerceptronTrainer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/PerceptronTrainer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/PerceptronTrainer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/PerceptronTrainer.java Mon Feb 15 18:17:54 2010
@@ -25,13 +25,13 @@
* Implements training accoring to the perceptron update rule.
* */
public class PerceptronTrainer extends LinearTrainer {
-
+
/** Logger for this class. */
private static final Logger LOG = LoggerFactory
- .getLogger(PerceptronTrainer.class);
+ .getLogger(PerceptronTrainer.class);
/** Rate the model is to be updated with at each step. */
private final double learningRate;
-
+
/**
* {@inheritDoc}
*
@@ -39,11 +39,11 @@
* rate to update the model with at each step.
* */
public PerceptronTrainer(int dimension, double threshold,
- double learningRate, double init, double initBias) throws CardinalityException {
+ double learningRate, double init, double initBias) throws CardinalityException {
super(dimension, threshold, init, initBias);
this.learningRate = learningRate;
}
-
+
/**
* {@inheritDoc} Perceptron update works such that in case the predicted label
* does not match the real label, the weight vector is updated as follows: In
@@ -55,17 +55,17 @@
* */
@Override
protected void update(double label, Vector dataPoint,
- LinearModel model) {
+ LinearModel model) {
double factor = 1.0;
if (label == 0.0) {
factor = -1.0;
}
-
+
Vector updateVector = dataPoint.times(factor).times(this.learningRate);
- LOG.debug("Updatevec: " + updateVector);
-
+ PerceptronTrainer.LOG.debug("Updatevec: " + updateVector);
+
model.addDelta(updateVector);
model.shiftBias(factor * this.learningRate);
- LOG.debug(model.toString());
+ PerceptronTrainer.LOG.debug(model.toString());
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/TrainingException.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/TrainingException.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/TrainingException.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/TrainingException.java Mon Feb 15 18:17:54 2010
@@ -8,7 +8,7 @@
public class TrainingException extends Exception {
/** Serialization id. */
private static final long serialVersionUID = 388611231310145397L;
-
+
/**
* Init with message string describing the cause of the exception.
* */
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java Mon Feb 15 18:17:54 2010
@@ -28,23 +28,22 @@
* This class implements training according to the winnow update algorithm.
*/
public class WinnowTrainer extends LinearTrainer {
-
+
private static final Logger log = LoggerFactory.getLogger(WinnowTrainer.class);
-
+
/** Promotion step to multiply weights with on update. */
private final double promotionStep;
-
+
public WinnowTrainer(int dimension, double promotionStep,
- double threshold, double init, double initBias) {
+ double threshold, double init, double initBias) {
super(dimension, threshold, init, initBias);
this.promotionStep = promotionStep;
}
-
- public WinnowTrainer(int dimension, double promotionStep)
- throws CardinalityException {
+
+ public WinnowTrainer(int dimension, double promotionStep) throws CardinalityException {
this(dimension, promotionStep, 0.5, 1, 0);
}
-
+
/**
* Initializes with dimension and promotionStep of 2.
*
@@ -54,7 +53,7 @@
public WinnowTrainer(int dimension) {
this(dimension, 2);
}
-
+
/**
* {@inheritDoc} Winnow update works such that in case the predicted label
* does not match the real label, the weight vector is updated as follows: In
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java Mon Feb 15 18:17:54 2010
@@ -32,6 +32,8 @@
private int nextCanopyId;
+ private int numVectors;
+
// the T1 distance threshold
private double t1;
@@ -108,9 +110,10 @@
pointStronglyBound = pointStronglyBound || (dist < t2);
}
if (!pointStronglyBound) {
- reporter.setStatus("Created new Canopy:" + nextCanopyId);
+ reporter.setStatus("Created new Canopy:" + nextCanopyId + " numPoints:" + numVectors);
canopies.add(new Canopy(point, nextCanopyId++));
}
+ numVectors++;
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Mon Feb 15 18:17:54 2010
@@ -111,10 +111,10 @@
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
- CanopyClusteringJob.runJob(input, output, measureClass, t1, t2);
+ runJob(input, output, measureClass, t1, t2);
} catch (OptionException e) {
- CanopyClusteringJob.log.error("Exception", e);
+ log.error("Exception", e);
CommandLineUtil.printHelp(group);
}
}
@@ -133,10 +133,11 @@
* @param t2
* the T2 distance threshold
*/
- public static void runJob(String input, String output, String measureClassName, double t1, double t2) throws IOException {
- CanopyDriver.runJob(input, output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY,
+ public static void runJob(String input, String output,
+ String measureClassName, double t1, double t2) throws IOException {
+ CanopyDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY,
measureClassName, t1, t2);
- ClusterDriver.runJob(input, output + CanopyClusteringJob.DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output,
+ ClusterDriver.runJob(input, output + DEFAULT_CANOPIES_OUTPUT_DIRECTORY, output,
measureClassName, t1, t2);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Mon Feb 15 18:17:54 2010
@@ -47,7 +47,7 @@
private static final Logger log = LoggerFactory.getLogger(CanopyDriver.class);
- private CanopyDriver() {}
+ private CanopyDriver() { }
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
@@ -109,7 +109,7 @@
CanopyDriver.runJob(input, output, measureClass, t1, t2);
} catch (OptionException e) {
- CanopyDriver.log.error("Exception", e);
+ log.error("Exception", e);
CommandLineUtil.printHelp(group);
}
@@ -131,7 +131,7 @@
*/
public static void runJob(String input, String output,
String measureClassName, double t1, double t2) throws IOException {
- CanopyDriver.log.info("Input: {} Out: {} "
+ log.info("Input: {} Out: {} "
+ "Measure: {} t1: {} t2: {}", new Object[] {input, output, measureClassName, t1, t2});
Configurable client = new JobClient();
JobConf conf = new JobConf(CanopyDriver.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Mon Feb 15 18:17:54 2010
@@ -114,10 +114,10 @@
double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());
- ClusterDriver.runJob(points, canopies, output, measureClass, t1, t2);
+ runJob(points, canopies, output, measureClass, t1, t2);
} catch (OptionException e) {
- ClusterDriver.log.error("Exception", e);
+ log.error("Exception", e);
CommandLineUtil.printHelp(group);
}
@@ -163,7 +163,7 @@
conf.setOutputFormat(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(points));
- Path outPath = new Path(output + ClusterDriver.DEFAULT_CLUSTER_OUTPUT_DIRECTORY);
+ Path outPath = new Path(output + DEFAULT_CLUSTER_OUTPUT_DIRECTORY);
FileOutputFormat.setOutputPath(conf, outPath);
conf.setMapperClass(ClusterMapper.class);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Mon Feb 15 18:17:54 2010
@@ -120,10 +120,10 @@
int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt).toString());
- DirichletDriver.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
+ runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
maxIterations, alpha_0, numReducers);
} catch (OptionException e) {
- DirichletDriver.log.error("Exception parsing command line: ", e);
+ log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
@@ -161,7 +161,7 @@
SecurityException,
NoSuchMethodException,
InvocationTargetException {
- DirichletDriver.runJob(input, output, modelFactory, "org.apache.mahout.math.DenseVector", 2, numClusters,
+ runJob(input, output, modelFactory, "org.apache.mahout.math.DenseVector", 2, numClusters,
maxIterations, alpha_0, numReducers);
}
@@ -200,14 +200,14 @@
InvocationTargetException {
String stateIn = output + "/state-0";
- DirichletDriver.writeInitialState(output, stateIn, modelFactory, modelPrototype, prototypeSize,
+ writeInitialState(output, stateIn, modelFactory, modelPrototype, prototypeSize,
numClusters, alpha_0);
for (int iteration = 0; iteration < maxIterations; iteration++) {
- DirichletDriver.log.info("Iteration {}", iteration);
+ log.info("Iteration {}", iteration);
// point the output to a new directory per iteration
String stateOut = output + "/state-" + (iteration + 1);
- DirichletDriver.runIteration(input, stateIn, stateOut, modelFactory, modelPrototype, prototypeSize,
+ runIteration(input, stateIn, stateOut, modelFactory, modelPrototype, prototypeSize,
numClusters, alpha_0, numReducers);
// now point the input to the old output directory
stateIn = stateOut;
@@ -228,7 +228,7 @@
NoSuchMethodException,
InvocationTargetException {
- DirichletState<VectorWritable> state = DirichletDriver.createState(modelFactory, modelPrototype,
+ DirichletState<VectorWritable> state = createState(modelFactory, modelPrototype,
prototypeSize, numModels, alpha_0);
JobConf job = new JobConf(KMeansDriver.class);
Path outPath = new Path(output);
@@ -329,18 +329,18 @@
conf.setNumReduceTasks(numReducers);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
- conf.set(DirichletDriver.STATE_IN_KEY, stateIn);
- conf.set(DirichletDriver.MODEL_FACTORY_KEY, modelFactory);
- conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, modelPrototype);
- conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
- conf.set(DirichletDriver.NUM_CLUSTERS_KEY, Integer.toString(numClusters));
- conf.set(DirichletDriver.ALPHA_0_KEY, Double.toString(alpha_0));
+ conf.set(STATE_IN_KEY, stateIn);
+ conf.set(MODEL_FACTORY_KEY, modelFactory);
+ conf.set(MODEL_PROTOTYPE_KEY, modelPrototype);
+ conf.set(PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
+ conf.set(NUM_CLUSTERS_KEY, Integer.toString(numClusters));
+ conf.set(ALPHA_0_KEY, Double.toString(alpha_0));
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (IOException e) {
- DirichletDriver.log.warn(e.toString(), e);
+ log.warn(e.toString(), e);
}
}
@@ -372,7 +372,7 @@
try {
JobClient.runJob(conf);
} catch (IOException e) {
- DirichletDriver.log.warn(e.toString(), e);
+ log.warn(e.toString(), e);
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=910282&r1=910281&r2=910282&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java Mon Feb 15 18:17:54 2010
@@ -94,7 +94,7 @@
DirichletJob.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
maxIterations, alpha_0);
} catch (OptionException e) {
- DirichletJob.log.error("Exception parsing command line: ", e);
+ log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}