You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/05/19 17:25:25 UTC
svn commit: r946219 - in /mahout/trunk/utils/src/main/java/org/apache/mahout:
clustering/lda/LDAPrintTopics.java utils/clustering/ClusterDumper.java
Author: jeastman
Date: Wed May 19 15:25:25 2010
New Revision: 946219
URL: http://svn.apache.org/viewvc?rev=946219&view=rev
Log:
fixed input file filters to avoid reading Hadoop _logs directory. Both work on Hadoop now
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=946219&r1=946218&r2=946219&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Wed May 19 15:25:25 2010
@@ -201,7 +201,7 @@ public class LDAPrintTopics {
IntPairWritable key = new IntPairWritable();
DoubleWritable value = new DoubleWritable();
- for (FileStatus status : fs.globStatus(new Path(dir, "*"))) {
+ for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
Path path = status.getPath();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
while (reader.next(key, value)) {
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=946219&r1=946218&r2=946219&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Wed May 19 15:25:25 2010
@@ -120,13 +120,7 @@ public final class ClusterDumper {
Writer writer = this.outputFile == null ? new OutputStreamWriter(System.out) : new FileWriter(this.outputFile);
FileSystem fs = seqFileDir.getFileSystem(conf);
- FileStatus[] seqFileList = fs.listStatus(seqFileDir, new PathFilter() {
- @Override
- public boolean accept(Path path) {
- return !path.getName().endsWith(".crc");
- }
- });
- for (FileStatus seqFile : seqFileList) {
+ for (FileStatus seqFile : fs.globStatus(new Path(seqFileDir, "part-*"))) {
Path path = seqFile.getPath();
System.out.println("Input Path: " + path);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);