You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2010/01/10 17:28:01 UTC
svn commit: r897654 -
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
Author: gsingers
Date: Sun Jan 10 16:28:01 2010
New Revision: 897654
URL: http://svn.apache.org/viewvc?rev=897654&view=rev
Log:
doc improvement
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=897654&r1=897653&r2=897654&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Sun Jan 10 16:28:01 2010
@@ -62,10 +62,10 @@
this.score = score;
this.word = word;
}
-
+
@Override
public int compareTo(StringDoublePair other) {
- return Double.compare(score,other.score);
+ return Double.compare(score, other.score);
}
@Override
@@ -85,21 +85,21 @@
}
public static List<List<String>> topWordsForTopics(String dir, Configuration job,
- List<String> wordList, int numWordsToPrint) throws IOException {
+ List<String> wordList, int numWordsToPrint) throws IOException {
FileSystem fs = new Path(dir).getFileSystem(job);
List<PriorityQueue<StringDoublePair>> queues = new ArrayList<PriorityQueue<StringDoublePair>>();
IntPairWritable key = new IntPairWritable();
DoubleWritable value = new DoubleWritable();
- for (FileStatus status : fs.globStatus(new Path(dir, "*"))) {
+ for (FileStatus status : fs.globStatus(new Path(dir, "*"))) {
Path path = status.getPath();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
while (reader.next(key, value)) {
int topic = key.getX();
int word = key.getY();
- ensureQueueSize(queues,topic);
+ ensureQueueSize(queues, topic);
if (word >= 0 && topic >= 0) {
double score = value.get();
String realWord = wordList.get(word);
@@ -111,9 +111,9 @@
List<List<String>> result = new ArrayList<List<String>>();
for (int i = 0; i < queues.size(); ++i) {
- result.add(i,new LinkedList<String>());
- for (StringDoublePair sdp: queues.get(i)) {
- result.get(i).add(0,sdp.word); // prepend
+ result.add(i, new LinkedList<String>());
+ for (StringDoublePair sdp : queues.get(i)) {
+ result.get(i).add(0, sdp.word); // prepend
}
}
@@ -129,13 +129,13 @@
// Adds the word if the queue is below capacity, or the score is high enough
private static void maybeEnqueue(Queue<StringDoublePair> q, String word,
- double score, int numWordsToPrint) {
+ double score, int numWordsToPrint) {
if (q.size() >= numWordsToPrint && score > q.peek().score) {
q.poll();
}
if (q.size() < numWordsToPrint) {
- q.add(new StringDoublePair(score,word));
- }
+ q.add(new StringDoublePair(score, word));
+ }
}
// Reads dictionary in created by the vector Driver in util
@@ -167,25 +167,25 @@
GroupBuilder gbuilder = new GroupBuilder();
Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
- abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
- "Path to an LDA output (a state)").withShortName("i").create();
+ abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription(
+ "Path to an LDA output (a state)").withShortName("i").create();
Option dictOpt = obuilder.withLongName("dict").withRequired(true).withArgument(
- abuilder.withName("dict").withMinimum(1).withMaximum(1).create()).withDescription(
- "Dictionary to read in, created by common.vector.Driver").withShortName("d").create();
+ abuilder.withName("dict").withMinimum(1).withMaximum(1).create()).withDescription(
+ "Dictionary to read in, in the same format as one created by org.apache.mahout.utils.vectors.lucene.Driver").withShortName("d").create();
Option outOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "Output directory to write top words").withShortName("o").create();
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
+ "Output directory to write top words").withShortName("o").create();
Option wordOpt = obuilder.withLongName("words").withRequired(true).withArgument(
- abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create()).withDescription(
- "Number of words to print").withShortName("w").create();
+ abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create()).withDescription(
+ "Number of words to print").withShortName("w").create();
Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(
- wordOpt).withOption(inputOpt).create();
+ wordOpt).withOption(inputOpt).create();
try {
Parser parser = new Parser();
parser.setGroup(group);
@@ -196,9 +196,9 @@
return;
}
- String input = cmdLine.getValue(inputOpt).toString();
+ String input = cmdLine.getValue(inputOpt).toString();
File output = new File(cmdLine.getValue(outOpt).toString());
- File dict = new File(cmdLine.getValue(dictOpt).toString());
+ File dict = new File(cmdLine.getValue(dictOpt).toString());
int numWords = 20;
if (cmdLine.hasOption(wordOpt)) {
numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString());
@@ -209,7 +209,7 @@
Configuration config = new Configuration();
List<List<String>> topWords = topWordsForTopics(input, config, wordList, numWords);
- if(!output.exists()) {
+ if (!output.exists()) {
if (!output.mkdirs()) {
throw new IOException("Could not create directory: " + output);
}
@@ -217,11 +217,11 @@
for (int i = 0; i < topWords.size(); ++i) {
List<String> topK = topWords.get(i);
- File out = new File(output,"topic-"+i);
+ File out = new File(output, "topic-" + i);
PrintWriter writer = new PrintWriter(new FileWriter(out));
writer.println("Topic " + i);
writer.println("===========");
- for (String word: topK) {
+ for (String word : topK) {
writer.println(word);
}
writer.close();