You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/03/25 10:50:23 UTC
svn commit: r1460571 [1/2] - in /mahout/trunk:
examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/
examples/src/main/java/org/apache/mahout/cf/taste/example/email/
examples/src/main/java/org/apache/mahout/cf/taste/example/jester/ ex...
Author: ssc
Date: Mon Mar 25 09:50:22 2013
New Revision: 1460571
URL: http://svn.apache.org/r1460571
Log:
increase code quality in examples
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCInMemoryItemSimilarity.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/SQL92JDBCItemSimilarity.java
mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterator.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/AbstractLuceneIterator.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommender.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommender.java Mon Mar 25 09:50:22 2013
@@ -88,4 +88,4 @@ public final class BookCrossingBooleanRe
return "BookCrossingBooleanRecommender[recommender:" + recommender + ']';
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderBuilder.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderBuilder.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderBuilder.java Mon Mar 25 09:50:22 2013
@@ -29,4 +29,4 @@ final class BookCrossingBooleanRecommend
return new BookCrossingBooleanRecommender(dataModel);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingBooleanRecommenderEvaluatorRunner.java Mon Mar 25 09:50:22 2013
@@ -56,4 +56,4 @@ public final class BookCrossingBooleanRe
log.info(String.valueOf(evaluation));
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java Mon Mar 25 09:50:22 2013
@@ -96,4 +96,4 @@ public final class BookCrossingDataModel
return "BookCrossingDataModel";
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommender.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommender.java Mon Mar 25 09:50:22 2013
@@ -87,4 +87,4 @@ public final class BookCrossingRecommend
return "BookCrossingRecommender[recommender:" + recommender + ']';
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderBuilder.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderBuilder.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingRecommenderBuilder.java Mon Mar 25 09:50:22 2013
@@ -29,4 +29,4 @@ final class BookCrossingRecommenderBuild
return new BookCrossingRecommender(dataModel);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java Mon Mar 25 09:50:22 2013
@@ -57,11 +57,12 @@ import java.util.concurrent.atomic.Atomi
* Convert the Mail archives (see {@link org.apache.mahout.text.SequenceFilesFromMailArchives}) to a preference
* file that can be consumed by the {@link org.apache.mahout.cf.taste.hadoop.pseudo.RecommenderJob}.
* <p/>
- * This assumes the input is a Sequence File, that the key is: filename/message id and the value is a list (separated by the
- * user's choosing) containing the from email and any references
+ * This assumes the input is a Sequence File, that the key is: filename/message id and the value is a list
+ * (separated by the user's choosing) containing the from email and any references
* <p/>
- * The output is a matrix where either the from or to are the rows (represented as longs) and the columns are the message ids
- * that the user has interacted with (as a VectorWritable). This class currently does not account for thread hijacking.
+ * The output is a matrix where either the from or to are the rows (represented as longs) and the columns are the
+ * message ids that the user has interacted with (as a VectorWritable). This class currently does not account for
+ * thread hijacking.
* <p/>
* It also outputs a side table mapping the row ids to their original and the message ids to the message thread id
*/
@@ -82,11 +83,15 @@ public final class MailToPrefsDriver ext
addOutputOption();
addOption(DefaultOptionCreator.overwriteOption().create());
addOption("chunkSize", "cs", "The size of chunks to write. Default is 100 mb", "100");
- addOption("separator", "sep", "The separator used in the input file to separate to, from, subject. Default is \\n", "\n");
- addOption("from", "f", "The position in the input text (value) where the from email is located, starting from zero (0).", "0");
- addOption("refs", "r", "The position in the input text (value) where the reference ids are located, starting from zero (0).", "1");
- addOption(buildOption("useCounts", "u", "If set, then use the number of times the user has interacted with a thread as an indication of their preference. Otherwise, use boolean preferences.",
- false, false, "true"));
+ addOption("separator", "sep", "The separator used in the input file to separate to, from, subject. Default is \\n",
+ "\n");
+ addOption("from", "f", "The position in the input text (value) where the from email is located, starting from " +
+ "zero (0).", "0");
+ addOption("refs", "r", "The position in the input text (value) where the reference ids are located, " +
+ "starting from zero (0).", "1");
+ addOption(buildOption("useCounts", "u", "If set, then use the number of times the user has interacted with a " +
+ "thread as an indication of their preference. Otherwise, use boolean preferences.", false, false,
+ String.valueOf(true)));
Map<String, List<String>> parsedArgs = parseArguments(args);
Path input = getInputPath();
@@ -106,7 +111,8 @@ public final class MailToPrefsDriver ext
boolean overwrite = hasOption(DefaultOptionCreator.OVERWRITE_OPTION);
// create the dictionary between message ids and longs
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- //TODO: there seems to be a pattern emerging for dictionary creation -- sparse vectors from seq files also has this.
+ //TODO: there seems to be a pattern emerging for dictionary creation
+ // -- sparse vectors from seq files also has this.
Path msgIdsPath = new Path(output, "msgIds");
if (overwrite) {
HadoopUtil.delete(conf, msgIdsPath);
@@ -128,7 +134,8 @@ public final class MailToPrefsDriver ext
return -1;
}
//write out the dictionary at the top level
- msgIdChunks = createDictionaryChunks(msgIdsPath, output, "msgIds-dictionary-", createMsgIdDictionary.getConfiguration(), chunkSize, msgDim);
+ msgIdChunks = createDictionaryChunks(msgIdsPath, output, "msgIds-dictionary-",
+ createMsgIdDictionary.getConfiguration(), chunkSize, msgDim);
}
//create the dictionary between from email addresses and longs
List<Path> fromChunks = null;
@@ -155,12 +162,14 @@ public final class MailToPrefsDriver ext
}
//write out the dictionary at the top level
int[] fromDim = new int[1];
- fromChunks = createDictionaryChunks(fromIdsPath, output, "fromIds-dictionary-", createFromIdDictionary.getConfiguration(), chunkSize, fromDim);
+ fromChunks = createDictionaryChunks(fromIdsPath, output, "fromIds-dictionary-",
+ createFromIdDictionary.getConfiguration(), chunkSize, fromDim);
}
//OK, we have our dictionaries, let's output the real thing we need: <from_id -> <msgId, msgId, msgId, ...>>
if (shouldRunNextPhase(parsedArgs, currentPhase) && fromChunks != null && msgIdChunks != null) {
//Job map
- //may be a way to do this so that we can load the from ids in memory, if they are small enough so that we don't need the double loop
+ //may be a way to do this so that we can load the from ids in memory, if they are small enough so that
+ // we don't need the double loop
log.info("Creating recommendation matrix");
Path vecPath = new Path(output, "recInput");
if (overwrite) {
@@ -181,20 +190,23 @@ public final class MailToPrefsDriver ext
Path out = new Path(vecPath, "tmp-" + i + '-' + j);
DistributedCache.setCacheFiles(new URI[]{fromChunk.toUri(), idChunk.toUri()}, conf);
Job createRecMatrix = prepareJob(input, out, SequenceFileInputFormat.class,
- MailToRecMapper.class, Text.class, LongWritable.class, MailToRecReducer.class, Text.class, NullWritable.class,
- TextOutputFormat.class);
+ MailToRecMapper.class, Text.class, LongWritable.class, MailToRecReducer.class, Text.class,
+ NullWritable.class, TextOutputFormat.class);
createRecMatrix.getConfiguration().set("mapred.output.compress", "false");
boolean succeeded = createRecMatrix.waitForCompletion(true);
if (!succeeded) {
return -1;
}
//copy the results up a level
- //HadoopUtil.copyMergeSeqFiles(out.getFileSystem(conf), out, vecPath.getFileSystem(conf), outPath, true, conf, "");
- FileStatus[] fs = HadoopUtil.getFileStatus(new Path(out, "*"), PathType.GLOB, PathFilters.partFilter(), null, conf);
+ //HadoopUtil.copyMergeSeqFiles(out.getFileSystem(conf), out, vecPath.getFileSystem(conf), outPath, true,
+ // conf, "");
+ FileStatus[] fs = HadoopUtil.getFileStatus(new Path(out, "*"), PathType.GLOB, PathFilters.partFilter(), null,
+ conf);
for (int k = 0; k < fs.length; k++) {
FileStatus f = fs[k];
Path outPath = new Path(vecPath, "chunk-" + i + '-' + j + '-' + k);
- FileUtil.copy(f.getPath().getFileSystem(conf), f.getPath(), outPath.getFileSystem(conf), outPath, true, overwrite, conf);
+ FileUtil.copy(f.getPath().getFileSystem(conf), f.getPath(), outPath.getFileSystem(conf), outPath, true,
+ overwrite, conf);
}
HadoopUtil.delete(conf, out);
j++;
@@ -207,7 +219,8 @@ public final class MailToPrefsDriver ext
HadoopUtil.delete(conf, mergePath);
}
log.info("Merging together output vectors to vectors.dat in {}", output);*/
- //HadoopUtil.copyMergeSeqFiles(vecPath.getFileSystem(conf), vecPath, mergePath.getFileSystem(conf), mergePath, false, conf, "\n");
+ //HadoopUtil.copyMergeSeqFiles(vecPath.getFileSystem(conf), vecPath, mergePath.getFileSystem(conf), mergePath,
+ // false, conf, "\n");
}
return 0;
@@ -217,7 +230,8 @@ public final class MailToPrefsDriver ext
Path dictionaryPathBase,
String name,
Configuration baseConf,
- int chunkSizeInMegabytes, int[] maxTermDimension) throws IOException {
+ int chunkSizeInMegabytes, int[] maxTermDimension)
+ throws IOException {
List<Path> chunkPaths = Lists.newArrayList();
Configuration conf = new Configuration(baseConf);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java Mon Mar 25 09:50:22 2013
@@ -80,7 +80,8 @@ public final class MailToRecMapper exten
}
}
}
- if (msgIdKey == Integer.MIN_VALUE) {//we don't have any references, so use the msg id
+ //we don't have any references, so use the msg id
+ if (msgIdKey == Integer.MIN_VALUE) {
//get the msg id and the from and output the associated ids
String keyStr = key.toString();
int idx = keyStr.lastIndexOf('/');
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecReducer.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecReducer.java Mon Mar 25 09:50:22 2013
@@ -24,7 +24,7 @@ import org.apache.hadoop.mapreduce.Reduc
import java.io.IOException;
-public class MailToRecReducer extends Reducer<Text, LongWritable, Text, NullWritable>{
+public class MailToRecReducer extends Reducer<Text, LongWritable, Text, NullWritable> {
//if true, then output weight
private boolean useCounts = true;
/**
@@ -38,7 +38,8 @@ public class MailToRecReducer extends Re
}
@Override
- protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
+ protected void reduce(Text key, Iterable<LongWritable> values, Context context)
+ throws IOException, InterruptedException {
if (useCounts) {
long sum = 0;
for (LongWritable value : values) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java Mon Mar 25 09:50:22 2013
@@ -85,4 +85,4 @@ public final class JesterDataModel exten
userBeingRead++;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommender.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommender.java Mon Mar 25 09:50:22 2013
@@ -81,4 +81,4 @@ public final class JesterRecommender imp
return "JesterRecommender[recommender:" + recommender + ']';
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderBuilder.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderBuilder.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderBuilder.java Mon Mar 25 09:50:22 2013
@@ -29,4 +29,4 @@ final class JesterRecommenderBuilder imp
return new JesterRecommender(dataModel);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java Mon Mar 25 09:50:22 2013
@@ -227,4 +227,4 @@ public final class KDDCupDataModel imple
// do nothing
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java Mon Mar 25 09:50:22 2013
@@ -80,4 +80,4 @@ public final class Track1Recommender imp
return "Track1Recommender[recommender:" + recommender + ']';
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java Mon Mar 25 09:50:22 2013
@@ -29,4 +29,4 @@ final class Track1RecommenderBuilder imp
return new Track1Recommender(dataModel);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java Mon Mar 25 09:50:22 2013
@@ -86,4 +86,4 @@ public final class Track2Recommender imp
return "Track1Recommender[recommender:" + recommender + ']';
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java Mon Mar 25 09:50:22 2013
@@ -30,4 +30,4 @@ final class Track2RecommenderBuilder imp
return new Track2Recommender(dataModel, ((KDDCupDataModel) dataModel).getDataFileDirectory());
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java Mon Mar 25 09:50:22 2013
@@ -35,7 +35,9 @@ import java.io.File;
* to the ratings.dat file as argument
*
*/
-public class BatchItemSimilaritiesGroupLens {
+public final class BatchItemSimilaritiesGroupLens {
+
+ private BatchItemSimilaritiesGroupLens() {}
public static void main(String[] args) throws Exception {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java Mon Mar 25 09:50:22 2013
@@ -315,4 +315,4 @@ public class TestForest extends Configur
ToolRunner.run(new Configuration(), new TestForest(), args);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailMapper.java Mon Mar 25 09:50:22 2013
@@ -21,14 +21,14 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.utils.email.MailProcessor;
import java.io.IOException;
import java.util.Locale;
import java.util.regex.Pattern;
/**
- * Convert the labels created by the {@link MailProcessor} to one consumable by the classifiers
+ * Convert the labels created by the {@link org.apache.mahout.utils.email.MailProcessor} to one consumable
+ * by the classifiers
*/
public class PrepEmailMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java Mon Mar 25 09:50:22 2013
@@ -24,7 +24,7 @@ import org.apache.mahout.math.VectorWrit
import java.io.IOException;
import java.util.Iterator;
-public class PrepEmailReducer extends Reducer<Text, VectorWritable, Text, VectorWritable>{
+public class PrepEmailReducer extends Reducer<Text, VectorWritable, Text, VectorWritable> {
private long maxItemsPerLabel = 10000;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailVectorsDriver.java Mon Mar 25 09:50:22 2013
@@ -34,7 +34,7 @@ import java.util.Map;
/**
* Convert the labels generated by {@link org.apache.mahout.text.SequenceFilesFromMailArchives} and
- * {@link org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles} to ones consumable by the classifiers. We do this
+ * {@link org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles} to ones consumable by the classifiers. We do this
* here b/c if it is done in the creation of sparse vectors, the Reducer collapses all the vectors.
*/
public class PrepEmailVectorsDriver extends AbstractJob {
@@ -51,8 +51,10 @@ public class PrepEmailVectorsDriver exte
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.overwriteOption().create());
- addOption("maxItemsPerLabel", "mipl", "The maximum number of items per label. Can be useful for making the training sets the same size", String.valueOf(100000));
- addOption(buildOption("useListName", "ul", "Use the name of the list as part of the label. If not set, then just use the project name", false, false, "false"));
+ addOption("maxItemsPerLabel", "mipl", "The maximum number of items per label. Can be useful for making the " +
+ "training sets the same size", String.valueOf(100000));
+ addOption(buildOption("useListName", "ul", "Use the name of the list as part of the label. If not set, then " +
+ "just use the project name", false, false, "false"));
Map<String,List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
return -1;
@@ -63,8 +65,8 @@ public class PrepEmailVectorsDriver exte
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
- Job convertJob = prepareJob(input, output, SequenceFileInputFormat.class, PrepEmailMapper.class,
- Text.class, VectorWritable.class, PrepEmailReducer.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
+ Job convertJob = prepareJob(input, output, SequenceFileInputFormat.class, PrepEmailMapper.class, Text.class,
+ VectorWritable.class, PrepEmailReducer.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
convertJob.getConfiguration().set(ITEMS_PER_CLASS, getOption("maxItemsPerLabel"));
convertJob.getConfiguration().set(USE_LIST_NAME, String.valueOf(hasOption("useListName")));
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java Mon Mar 25 09:50:22 2013
@@ -91,7 +91,8 @@ public final class SGDHelper {
return r;
}
- static void analyzeState(SGDInfo info, int leakType, int k, State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best) throws IOException {
+ static void analyzeState(SGDInfo info, int leakType, int k, State<AdaptiveLogisticRegression.Wrapper,
+ CrossFoldLearner> best) throws IOException {
int bump = info.getBumps()[(int) Math.floor(info.getStep()) % info.getBumps().length];
int scale = (int) Math.pow(10, Math.floor(info.getStep() / info.getBumps().length));
double maxBeta;
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java Mon Mar 25 09:50:22 2013
@@ -53,8 +53,7 @@ public final class TestASFEmail {
private String inputFile;
private String modelFile;
- private TestASFEmail() {
- }
+ private TestASFEmail() {}
public static void main(String[] args) throws IOException {
TestASFEmail runner = new TestASFEmail();
@@ -79,8 +78,9 @@ public final class TestASFEmail {
return path.getName().contains("test");
}
};
- SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
- null, true, conf);
+ SequenceFileDirIterator<Text, VectorWritable> iter =
+ new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
+ null, true, conf);
long numItems = 0;
while (iter.hasNext()) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java Mon Mar 25 09:50:22 2013
@@ -86,7 +86,8 @@ public final class TestNewsGroups {
int actual = newsGroups.intern(ng);
NewsgroupHelper helper = new NewsgroupHelper();
- Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts); //no leak type ensures this is a normal vector
+ //no leak type ensures this is a normal vector
+ Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts);
Vector result = classifier.classifyFull(input);
int cat = result.maxValueIndex();
double score = result.maxValue();
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java Mon Mar 25 09:50:22 2013
@@ -75,11 +75,11 @@ public final class ValidateAdaptiveLogis
.loadFromFile(new File(modelFile));
CsvRecordFactory csv = lmp.getCsvRecordFactory();
AdaptiveLogisticRegression lr = lmp.createAdaptiveLogisticRegression();
-
- if (lmp.getTargetCategories().size() <=2 ) {
+
+ if (lmp.getTargetCategories().size() <= 2) {
collector = new Auc();
}
-
+
OnlineSummarizer slh = new OnlineSummarizer();
ConfusionMatrix cm = new ConfusionMatrix(lmp.getTargetCategories(), defaultCategory);
@@ -103,10 +103,10 @@ public final class ValidateAdaptiveLogis
int target = csv.processLine(line, v);
double likelihood = learner.logLikelihood(target, v);
double score = learner.classifyFull(v).maxValue();
-
+
slh.add(likelihood);
cm.addInstance(csv.getTargetString(line), csv.getTargetLabel(target));
-
+
if (showScores) {
output.printf(Locale.ENGLISH, "%8d, %.12f, %.13f, %.13f\n", target,
score, learner.logLikelihood(target, v), slh.getMean());
@@ -116,7 +116,7 @@ public final class ValidateAdaptiveLogis
}
line = in.readLine();
}
-
+
output.printf(Locale.ENGLISH,"\nLog-likelihood:");
output.printf(Locale.ENGLISH, "Min=%.2f, Max=%.2f, Mean=%.2f, Median=%.2f\n",
slh.getMin(), slh.getMax(), slh.getMean(), slh.getMedian());
@@ -124,10 +124,10 @@ public final class ValidateAdaptiveLogis
if (collector != null) {
output.printf(Locale.ENGLISH, "\nAUC = %.2f\n", collector.auc());
}
-
+
if (showConfusion) {
output.printf(Locale.ENGLISH, "\n%s\n\n", cm.toString());
-
+
if (collector != null) {
Matrix m = collector.entropy();
output.printf(Locale.ENGLISH,
@@ -135,7 +135,7 @@ public final class ValidateAdaptiveLogis
m.get(1, 0), m.get(0, 1), m.get(1, 1));
}
}
-
+
}
}
@@ -155,7 +155,7 @@ public final class ValidateAdaptiveLogis
Option scores = builder.withLongName("scores")
.withDescription("print scores").create();
-
+
ArgumentBuilder argumentBuilder = new ArgumentBuilder();
Option inputFileOption = builder
.withLongName("input")
@@ -172,14 +172,14 @@ public final class ValidateAdaptiveLogis
argumentBuilder.withName("model").withMaximum(1)
.create())
.withDescription("where to get the trained model").create();
-
+
Option defaultCagetoryOption = builder
- .withLongName("defaultCategory")
- .withRequired(false)
- .withArgument(
- argumentBuilder.withName("defaultCategory").withMaximum(1).withDefault("unknown")
- .create())
- .withDescription("the default category value to use").create();
+ .withLongName("defaultCategory")
+ .withRequired(false)
+ .withArgument(
+ argumentBuilder.withName("defaultCategory").withMaximum(1).withDefault("unknown")
+ .create())
+ .withDescription("the default category value to use").create();
Group normalArgs = new GroupBuilder().withOption(help)
.withOption(quiet).withOption(auc).withOption(scores)
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java Mon Mar 25 09:50:22 2013
@@ -36,11 +36,11 @@ import org.apache.mahout.common.distance
public class DisplaySpectralKMeans extends DisplayClustering {
- protected static final String SAMPLES = "samples";
- protected static final String OUTPUT = "output";
- protected static final String TEMP = "tmp";
- protected static final String AFFINITIES = "affinities";
-
+ protected static final String SAMPLES = "samples";
+ protected static final String OUTPUT = "output";
+ protected static final String TEMP = "tmp";
+ protected static final String AFFINITIES = "affinities";
+
DisplaySpectralKMeans() {
initialize();
setTitle("Spectral k-Means Clusters (>" + (int) (significance * 100) + "% of population)");
@@ -83,6 +83,6 @@ public class DisplaySpectralKMeans exten
@Override
public void paint(Graphics g) {
- plotClusteredSampleData((Graphics2D) g, new Path(OUTPUT));
+ plotClusteredSampleData((Graphics2D) g, new Path(OUTPUT));
}
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Mon Mar 25 09:50:22 2013
@@ -751,7 +751,7 @@ public class VectorBenchmarks {
}
- int numClusters=25;
+ int numClusters = 25;
if (cmdLine.hasOption(numClustersOpt)) {
numClusters = Integer.parseInt((String) cmdLine.getValue(numClustersOpt));
}
@@ -849,4 +849,4 @@ public class VectorBenchmarks {
return sb.toString();
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java Mon Mar 25 09:50:22 2013
@@ -234,7 +234,7 @@ public final class HBaseDataModel implem
SortedMap<byte[],byte[]> families = result.getFamilyMap(ITEMS_CF);
FastIDSet ids = new FastIDSet(families.size());
- for (byte[] family: families.keySet()) {
+ for (byte[] family : families.keySet()) {
ids.add(Bytes.toLong(family));
}
return ids;
@@ -495,7 +495,7 @@ public final class HBaseDataModel implem
// Copy into FastIDSet
FastIDSet userIDs = new FastIDSet(ids.size());
- for(long l : ids) {
+ for (long l : ids) {
userIDs.add(l);
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java Mon Mar 25 09:50:22 2013
@@ -158,4 +158,4 @@ public class MySQLBooleanPrefJDBCDataMod
return Integer.MIN_VALUE;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Mon Mar 25 09:50:22 2013
@@ -187,4 +187,4 @@ public final class MySQLJDBCDiffStorage
return Integer.MIN_VALUE;
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCInMemoryItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCInMemoryItemSimilarity.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCInMemoryItemSimilarity.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/AbstractJDBCInMemoryItemSimilarity.java Mon Mar 25 09:50:22 2013
@@ -94,7 +94,7 @@ abstract class AbstractJDBCInMemoryItemS
}
}
- private static class JDBCSimilaritiesIterable implements Iterable<GenericItemSimilarity.ItemItemSimilarity> {
+ private static final class JDBCSimilaritiesIterable implements Iterable<GenericItemSimilarity.ItemItemSimilarity> {
private final DataSource dataSource;
private final String getAllItemSimilaritiesSQL;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java Mon Mar 25 09:50:22 2013
@@ -100,4 +100,5 @@ public class MySQLJDBCItemSimilarity ext
return Integer.MIN_VALUE;
}
-}
\ No newline at end of file
+}
+
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/SQL92JDBCItemSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/SQL92JDBCItemSimilarity.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/SQL92JDBCItemSimilarity.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/SQL92JDBCItemSimilarity.java Mon Mar 25 09:50:22 2013
@@ -54,4 +54,4 @@ public class SQL92JDBCItemSimilarity ext
+ itemAIDColumn + "=? OR " + itemBIDColumn + "=?");
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java Mon Mar 25 09:50:22 2013
@@ -64,15 +64,18 @@ public final class ConfusionMatrixDumper
+ "}\n"
+ "th.normalHeader\n"
+ "{\n"
- + "border:1px solid black;border-collapse:collapse;text-align:center;background-color:white\n"
+ + "border:1px solid black;border-collapse:collapse;text-align:center;" +
+ "background-color:white\n"
+ "}\n"
+ "th.tallHeader\n"
+ "{\n"
- + "border:1px solid black;border-collapse:collapse;text-align:center;background-color:white; height:6em\n"
+ + "border:1px solid black;border-collapse:collapse;text-align:center;" +
+ "background-color:white; height:6em\n"
+ "}\n"
+ "tr.label\n"
+ "{\n"
- + "border:1px solid black;border-collapse:collapse;text-align:center;background-color:white\n"
+ + "border:1px solid black;border-collapse:collapse;text-align:center;" +
+ "background-color:white\n"
+ "}\n"
+ "tr.row\n"
+ "{\n"
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Mon Mar 25 09:50:22 2013
@@ -58,9 +58,12 @@ public final class CDbwEvaluator {
private final List<Cluster> clusters;
private final DistanceMeasure measure;
private Double interClusterDensity = null;
- private Map<Integer,Map<Integer,Double>> minimumDistances = null; // these are symmetric so we only compute half of them
- private Map<Integer,Map<Integer,Double>> interClusterDensities = null; // these are symmetric too
- private Map<Integer,Map<Integer,int[]>> closestRepPointIndices = null; // these are symmetric too
+ // these are symmetric so we only compute half of them
+ private Map<Integer,Map<Integer,Double>> minimumDistances = null;
+ // these are symmetric too
+ private Map<Integer,Map<Integer,Double>> interClusterDensities = null;
+ // these are symmetric too
+ private Map<Integer,Map<Integer,int[]>> closestRepPointIndices = null;
/**
* For testing only
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Mon Mar 25 09:50:22 2013
@@ -183,7 +183,8 @@ public final class RepresentativePointsD
Map<Integer,List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn);
Map<Integer,WeightedVectorWritable> mostDistantPoints = Maps.newHashMap();
FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf);
- for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileDirIterable<IntWritable,WeightedVectorWritable>(
+ for (Pair<IntWritable,WeightedVectorWritable> record
+ : new SequenceFileDirIterable<IntWritable,WeightedVectorWritable>(
clusteredPointsIn, PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
RepresentativePointsMapper.mapPoint(record.getFirst(), record.getSecond(), measure, repPoints, mostDistantPoints);
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Mon Mar 25 09:50:22 2013
@@ -179,7 +179,7 @@ public final class LDAPrintTopics {
}
});
for (Pair<String,Double> wordWithScore : topKasList) {
- out.write(wordWithScore.getFirst() + " [p(" + wordWithScore.getFirst() + "|topic_" + i +") = "
+ out.write(wordWithScore.getFirst() + " [p(" + wordWithScore.getFirst() + "|topic_" + i + ") = "
+ wordWithScore.getSecond());
out.write('\n');
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Mon Mar 25 09:50:22 2013
@@ -147,7 +147,7 @@ public final class MailArchivesClusterin
if (length >= 2 && length <= 28) {
char[] buf = termAtt.buffer();
int at = 0;
- for (int c=0; c < length; c++) {
+ for (int c = 0; c < length; c++) {
char ch = buf[c];
if (ch != '\'') {
output[at++] = ch;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java Mon Mar 25 09:50:22 2013
@@ -26,7 +26,6 @@ import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
@@ -36,9 +35,9 @@ import org.apache.mahout.utils.io.Chunke
/**
* Converts a directory of text documents into SequenceFiles of Specified chunkSize. This class takes in a
* parent directory containing sub folders of text documents and recursively reads the files and creates the
- * {@link SequenceFile}s of docid => content. The docid is set as the relative path of the document from the
- * parent directory prepended with a specified prefix. You can also specify the input encoding of the text
- * files. The content of the output SequenceFiles are encoded as UTF-8 text.
+ * {@link org.apache.hadoop.io.SequenceFile}s of docid => content. The docid is set as the relative path of the
+ * document from the parent directory prepended with a specified prefix. You can also specify the input encoding
+ * of the text files. The content of the output SequenceFiles are encoded as UTF-8 text.
*/
public class SequenceFilesFromDirectory extends AbstractJob {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Mon Mar 25 09:50:22 2013
@@ -23,7 +23,6 @@ import org.apache.commons.cli2.builder.A
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -45,7 +44,7 @@ import java.util.regex.Pattern;
/**
* Converts a directory of gzipped mail archives into SequenceFiles of specified
* chunkSize. This class is similar to {@link SequenceFilesFromDirectory} except
- * it uses block-compressed {@link SequenceFile}s and parses out the subject and
+ * it uses block-compressed {@link org.apache.hadoop.io.SequenceFile}s and parses out the subject and
* body text of each mail message into a separate key/value pair.
*/
public final class SequenceFilesFromMailArchives extends AbstractJob {
@@ -166,7 +165,8 @@ public final class SequenceFilesFromMail
obuilder.withLongName("quotedRegex")
.withRequired(false).withArgument(abuilder.withName("regex")
.withMinimum(1).withMaximum(1).create()).withDescription(
- "Specify the regex that identifies quoted text. Default is to look for > or | at the beginning of the line.")
+ "Specify the regex that identifies quoted text. " +
+ "Default is to look for > or | at the beginning of the line.")
.withShortName("q").create());
addOption(
obuilder.withLongName("separator")
@@ -179,7 +179,8 @@ public final class SequenceFilesFromMail
obuilder.withLongName("bodySeparator")
.withRequired(false).withArgument(abuilder.withName("bodySeparator")
.withMinimum(1).withMaximum(1).create()).withDescription(
- "The separator to use between lines in the body. Default is \\n. Useful to change if you wish to have the message be on one line")
+ "The separator to use between lines in the body. Default is \\n. " +
+ "Useful to change if you wish to have the message be on one line")
.withShortName("bodySep").create());
addOption(DefaultOptionCreator.helpOption());
Map<String, List<String>> parsedArgs = parseArguments(args);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Mon Mar 25 09:50:22 2013
@@ -48,4 +48,4 @@ public class WikipediaAnalyzer extends S
result = new StopFilter(Version.LUCENE_41, result, getStopwordSet());
return new TokenStreamComponents(tokenizer, result);
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Mon Mar 25 09:50:22 2013
@@ -52,7 +52,8 @@ public final class SequenceFileDumper ex
addOption("substring", "b", "The number of chars to print out per value", false);
addOption(buildOption("count", "c", "Report the count only", false, false, null));
addOption("numItems", "n", "Output at most <n> key value pairs", false);
- addOption(buildOption("facets", "fa", "Output the counts per key. Note, if there are a lot of unique keys, this can take up a fair amount of memory", false, false, null));
+ addOption(buildOption("facets", "fa", "Output the counts per key. Note, if there are a lot of unique keys, " +
+ "this can take up a fair amount of memory", false, false, null));
addOption(buildOption("quiet", "q", "Print only file contents.", false, false, null));
if (parseArguments(args, false, true) == null) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java Mon Mar 25 09:50:22 2013
@@ -156,15 +156,20 @@ public class SplitInput extends Abstract
addOption("testOutput", "te", "The test data output directory", false);
addOption("testSplitSize", "ss", "The number of documents held back as test data for each category", false);
addOption("testSplitPct", "sp", "The % of documents held back as test data for each category", false);
- addOption("splitLocation", "sl", "Location for start of test data expressed as a percentage of the input file size (0=start, 50=middle, 100=end", false);
+ addOption("splitLocation", "sl", "Location for start of test data expressed as a percentage of the input file " +
+ "size (0=start, 50=middle, 100=end", false);
addOption("randomSelectionSize", "rs", "The number of items to be randomly selected as test data ", false);
- addOption("randomSelectionPct", "rp", "Percentage of items to be randomly selected as test data when using mapreduce mode", false);
- addOption("charset", "c", "The name of the character encoding of the input files (not needed if using SequenceFiles)", false);
- addOption(buildOption("sequenceFiles", "seq", "Set if the input files are sequence files. Default is false", false, false, "false"));
+ addOption("randomSelectionPct", "rp", "Percentage of items to be randomly selected as test data when using " +
+ "mapreduce mode", false);
+ addOption("charset", "c", "The name of the character encoding of the input files (not needed if using " +
+ "SequenceFiles)", false);
+ addOption(buildOption("sequenceFiles", "seq", "Set if the input files are sequence files. Default is false",
+ false, false, "false"));
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
//TODO: extend this to sequential mode
- addOption("keepPct", "k", "The percentage of total data to keep in map-reduce mode, the rest will be ignored. Default is 100%", false);
+ addOption("keepPct", "k", "The percentage of total data to keep in map-reduce mode, the rest will be ignored. " +
+ "Default is 100%", false);
addOption("mapRedOutputDir", "mro", "Output directory for map reduce jobs", false);
if (parseArguments(args) == null) {
@@ -222,7 +227,8 @@ public class SplitInput extends Abstract
}
if (hasOption("testSplitSize") && hasOption("testSplitPct")) {
- throw new OptionException(getCLIOption("testSplitPct"), "must have either split size or split percentage option, not BOTH");
+ throw new OptionException(getCLIOption("testSplitPct"), "must have either split size or split percentage " +
+ "option, not BOTH");
}
if (hasOption("testSplitSize")) {
@@ -413,8 +419,10 @@ public class SplitInput extends Abstract
} else {
SequenceFileIterator<Writable, Writable> iterator =
new SequenceFileIterator<Writable, Writable>(inputFile, false, fs.getConf());
- SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile, iterator.getKeyClass(), iterator.getValueClass());
- SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile, iterator.getKeyClass(), iterator.getValueClass());
+ SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile,
+ iterator.getKeyClass(), iterator.getValueClass());
+ SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile,
+ iterator.getKeyClass(), iterator.getValueClass());
try {
int pos = 0;
@@ -617,7 +625,8 @@ public class SplitInput extends Abstract
|| testRandomSelectionPct == -1,
"Invalid testRandomSelectionPct percentage", testRandomSelectionPct);
- Preconditions.checkArgument(trainingOutputDirectory != null || useMapRed, "No training output directory was specified");
+ Preconditions.checkArgument(trainingOutputDirectory != null || useMapRed,
+ "No training output directory was specified");
Preconditions.checkArgument(testOutputDirectory != null || useMapRed, "No test output directory was specified");
// only one of the following may be set, one must be set.
@@ -635,8 +644,8 @@ public class SplitInput extends Abstract
count++;
}
- Preconditions.checkArgument(count == 1,
- "Exactly one of testSplitSize, testSplitPct, testRandomSelectionSize, testRandomSelectionPct should be set");
+ Preconditions.checkArgument(count == 1, "Exactly one of testSplitSize, testSplitPct, testRandomSelectionSize, " +
+ "testRandomSelectionPct should be set");
if (!useMapRed) {
Configuration conf = getConf();
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Mon Mar 25 09:50:22 2013
@@ -52,10 +52,13 @@ public abstract class AbstractClusterWri
/**
*
* @param writer The underlying {@link java.io.Writer} to use
- * @param clusterIdToPoints The map between cluster ids {@link org.apache.mahout.clustering.Cluster#getId()} and the points in the cluster
- * @param measure The {@link org.apache.mahout.common.distance.DistanceMeasure} used to calculate the distance. Some writers may wish to use it for calculating weights for display. May be null.
+ * @param clusterIdToPoints The map between cluster ids {@link org.apache.mahout.clustering.Cluster#getId()} and the
+ * points in the cluster
+ * @param measure The {@link org.apache.mahout.common.distance.DistanceMeasure} used to calculate the distance.
+ * Some writers may wish to use it for calculating weights for display. May be null.
*/
- protected AbstractClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints, DistanceMeasure measure) {
+ protected AbstractClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+ DistanceMeasure measure) {
this.writer = writer;
this.clusterIdToPoints = clusterIdToPoints;
this.measure = measure;
@@ -181,4 +184,4 @@ public abstract class AbstractClusterWri
this.weight = weight;
}
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java Mon Mar 25 09:50:22 2013
@@ -39,7 +39,8 @@ public class CSVClusterWriter extends Ab
private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
- public CSVClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints, DistanceMeasure measure) {
+ public CSVClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
+ DistanceMeasure measure) {
super(writer, clusterIdToPoints, measure);
}
@@ -47,7 +48,7 @@ public class CSVClusterWriter extends Ab
public void write(ClusterWritable clusterWritable) throws IOException {
StringBuilder line = new StringBuilder();
Cluster cluster = clusterWritable.getValue();
- line.append(cluster.getId());
+ line.append(cluster.getId());
List<WeightedVectorWritable> points = getClusterIdToPoints().get(cluster.getId());
if (points != null) {
for (WeightedVectorWritable point : points) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Mon Mar 25 09:50:22 2013
@@ -65,13 +65,11 @@ public final class ClusterDumper extends
GRAPH_ML,
}
- public static final String OUTPUT_OPTION = "output";
public static final String DICTIONARY_TYPE_OPTION = "dictionaryType";
public static final String DICTIONARY_OPTION = "dictionary";
public static final String POINTS_DIR_OPTION = "pointsDir";
public static final String NUM_WORDS_OPTION = "numWords";
public static final String SUBSTRING_OPTION = "substring";
- public static final String SEQ_FILE_DIR_OPTION = "seqFileDir";
public static final String EVALUATE_CLUSTERS = "evaluate";
public static final String OUTPUT_FORMAT_OPT = "outputFormat";
@@ -106,16 +104,19 @@ public final class ClusterDumper extends
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
- addOption(OUTPUT_FORMAT_OPT, "of", "The optional output format for the results. Options: TEXT, CSV or GRAPH_ML", "TEXT");
+ addOption(OUTPUT_FORMAT_OPT, "of", "The optional output format for the results. Options: TEXT, CSV or GRAPH_ML",
+ "TEXT");
addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
addOption(POINTS_DIR_OPTION, "p",
"The directory containing points sequence files mapping input vectors to their cluster. "
+ "If specified, then the program will output the points associated with a cluster");
- addOption(SAMPLE_POINTS, "sp", "Specifies the maximum number of points to include _per_ cluster. The default is to include all points");
+ addOption(SAMPLE_POINTS, "sp", "Specifies the maximum number of points to include _per_ cluster. The default " +
+ "is to include all points");
addOption(DICTIONARY_OPTION, "d", "The dictionary file");
addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text");
- addOption(buildOption(EVALUATE_CLUSTERS, "e", "Run ClusterEvaluator and CDbwEvaluator over the input. The output will be appended to the rest of the output at the end.", false, false, null));
+ addOption(buildOption(EVALUATE_CLUSTERS, "e", "Run ClusterEvaluator and CDbwEvaluator over the input. " +
+ "The output will be appended to the rest of the output at the end.", false, false, null));
addOption(DefaultOptionCreator.distanceMeasureOption().create());
// output is optional, will print to System.out per default
@@ -181,13 +182,14 @@ public final class ClusterDumper extends
FileSystem fs = FileSystem.get(p.toUri(), conf);
writer = new OutputStreamWriter(fs.create(p), Charsets.UTF_8);
} else {
- Files.createParentDirs(outputFile);
+ Files.createParentDirs(outputFile);
writer = Files.newWriter(this.outputFile, Charsets.UTF_8);
}
}
ClusterWriter clusterWriter = createClusterWriter(writer, dictionary);
try {
- long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>(new Path(seqFileDir, "part-*"), PathType.GLOB, conf));
+ long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>(new Path(seqFileDir,
+ "part-*"), PathType.GLOB, conf));
writer.flush();
if (runEvaluation) {
@@ -225,7 +227,7 @@ public final class ClusterDumper extends
}
ClusterWriter createClusterWriter(Writer writer, String[] dictionary) throws IOException {
- ClusterWriter result = null;
+ ClusterWriter result;
switch (outputFormat) {
case TEXT:
@@ -237,6 +239,8 @@ public final class ClusterDumper extends
case GRAPH_ML:
result = new GraphMLClusterWriter(writer, clusterIdToPoints, measure, numTopFeatures, dictionary, subString);
break;
+ default:
+ throw new IllegalStateException("Unknown outputformat: " + outputFormat);
}
return result;
}
@@ -289,7 +293,8 @@ public final class ClusterDumper extends
this.maxPointsPerCluster = maxPointsPerCluster;
}
- public static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, long maxPointsPerCluster, Configuration conf) {
+ public static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, long maxPointsPerCluster,
+ Configuration conf) {
Map<Integer, List<WeightedVectorWritable>> result = new TreeMap<Integer, List<WeightedVectorWritable>>();
for (Pair<IntWritable, WeightedVectorWritable> record :
new SequenceFileDirIterable<IntWritable, WeightedVectorWritable>(
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java Mon Mar 25 09:50:22 2013
@@ -99,7 +99,7 @@ public class GraphMLClusterWriter extend
public void write(ClusterWritable clusterWritable) throws IOException {
StringBuilder line = new StringBuilder();
Cluster cluster = clusterWritable.getValue();
- Color rgb = getColor(cluster.getId());
+ Color rgb = getColor(cluster.getId());
String topTerms = "";
if (dictionary != null) {
@@ -122,7 +122,8 @@ public class GraphMLClusterWriter extend
Vector theVec = point.getVector();
double distance = 1;
if (measure != null) {
- distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500; //scale the distance
+ //scale the distance
+ distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;
}
String vecStr;
int angle = random.nextInt(360); //pick an angle at random and then scale along that angle
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailOptions.java Mon Mar 25 09:50:22 2013
@@ -149,7 +149,8 @@ public class MailOptions {
/**
* @see #setStripQuotedText(boolean)
*
- * @param quotedTextPattern The {@link java.util.regex.Pattern} to use to identify lines that are quoted text. Default is | and >
+ * @param quotedTextPattern The {@link java.util.regex.Pattern} to use to identify lines that are quoted text.
+ * Default is | and >
*/
public void setQuotedTextPattern(Pattern quotedTextPattern) {
this.quotedTextPattern = quotedTextPattern;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java Mon Mar 25 09:50:22 2013
@@ -22,6 +22,8 @@ import org.apache.mahout.utils.io.Chunke
import org.apache.mahout.utils.io.ChunkedWrapper;
import org.apache.mahout.utils.io.IOWriterWrapper;
import org.apache.mahout.utils.io.WrappedWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
@@ -31,23 +33,22 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MailProcessor {
- private static final Pattern MESSAGE_START =
- Pattern.compile("^From \\S+@\\S.*\\d{4}$", Pattern.CASE_INSENSITIVE);
- private static final Pattern MESSAGE_ID_PREFIX =
- Pattern.compile("^message-id: <(.*)>$", Pattern.CASE_INSENSITIVE);
+
+ private static final Pattern MESSAGE_START = Pattern.compile("^From \\S+@\\S.*\\d{4}$", Pattern.CASE_INSENSITIVE);
+ private static final Pattern MESSAGE_ID_PREFIX = Pattern.compile("^message-id: <(.*)>$", Pattern.CASE_INSENSITIVE);
// regular expressions used to parse individual messages
- public static final Pattern SUBJECT_PREFIX =
- Pattern.compile("^subject: (.*)$", Pattern.CASE_INSENSITIVE);
- public static final Pattern FROM_PREFIX =
- Pattern.compile("^from: (\\S.*)$", Pattern.CASE_INSENSITIVE); //we need to have at least one character
- public static final Pattern REFS_PREFIX =
- Pattern.compile("^references: (.*)$", Pattern.CASE_INSENSITIVE);
- public static final Pattern TO_PREFIX =
- Pattern.compile("^to: (.*)$", Pattern.CASE_INSENSITIVE);
+ public static final Pattern SUBJECT_PREFIX = Pattern.compile("^subject: (.*)$", Pattern.CASE_INSENSITIVE);
+ //we need to have at least one character
+ public static final Pattern FROM_PREFIX = Pattern.compile("^from: (\\S.*)$", Pattern.CASE_INSENSITIVE);
+ public static final Pattern REFS_PREFIX = Pattern.compile("^references: (.*)$", Pattern.CASE_INSENSITIVE);
+ public static final Pattern TO_PREFIX = Pattern.compile("^to: (.*)$", Pattern.CASE_INSENSITIVE);
+
private final String prefix;
private final MailOptions options;
private final WrappedWriter writer;
+ private static final Logger log = LoggerFactory.getLogger(MailProcessor.class);
+
public MailProcessor(MailOptions options, String prefix, Writer writer) {
this.writer = new IOWriterWrapper(writer);
this.options = options;
@@ -134,6 +135,7 @@ public class MailProcessor {
}
} catch (FileNotFoundException e) {
// Skip file.
+ log.warn("Unable to process non-existing file", e);
}
// TODO: report exceptions and continue;
return messageCount;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java?rev=1460571&r1=1460570&r2=1460571&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java Mon Mar 25 09:50:22 2013
@@ -56,15 +56,16 @@ public class RegexMapper extends Mapper<
}
}
- transformer = ClassUtils.instantiateAs(config.get(TRANSFORMER_CLASS, IdentityTransformer.class.getName()), RegexTransformer.class);
+ transformer = ClassUtils.instantiateAs(config.get(TRANSFORMER_CLASS, IdentityTransformer.class.getName()),
+ RegexTransformer.class);
String analyzerName = config.get(ANALYZER_NAME);
if (analyzerName != null && transformer instanceof AnalyzerTransformer) {
Analyzer analyzer = ClassUtils.instantiateAs(analyzerName, Analyzer.class);
((AnalyzerTransformer)transformer).setAnalyzer(analyzer);
}
- formatter = ClassUtils.instantiateAs(config.get(FORMATTER_CLASS, IdentityFormatter.class.getName()), RegexFormatter.class);
-
+ formatter = ClassUtils.instantiateAs(config.get(FORMATTER_CLASS, IdentityFormatter.class.getName()),
+ RegexFormatter.class);
}