You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by je...@apache.org on 2010/05/19 17:25:25 UTC

svn commit: r946219 - in /mahout/trunk/utils/src/main/java/org/apache/mahout: clustering/lda/LDAPrintTopics.java utils/clustering/ClusterDumper.java

Author: jeastman
Date: Wed May 19 15:25:25 2010
New Revision: 946219

URL: http://svn.apache.org/viewvc?rev=946219&view=rev
Log:
fixed input file filters to avoid reading Hadoop _logs directory. Both work on Hadoop now

Modified:
    mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=946219&r1=946218&r2=946219&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Wed May 19 15:25:25 2010
@@ -201,7 +201,7 @@ public class LDAPrintTopics {
     
     IntPairWritable key = new IntPairWritable();
     DoubleWritable value = new DoubleWritable();
-    for (FileStatus status : fs.globStatus(new Path(dir, "*"))) {
+    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
       Path path = status.getPath();
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
       while (reader.next(key, value)) {

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=946219&r1=946218&r2=946219&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Wed May 19 15:25:25 2010
@@ -120,13 +120,7 @@ public final class ClusterDumper {
     Writer writer = this.outputFile == null ? new OutputStreamWriter(System.out) : new FileWriter(this.outputFile);
 
     FileSystem fs = seqFileDir.getFileSystem(conf);
-    FileStatus[] seqFileList = fs.listStatus(seqFileDir, new PathFilter() {
-      @Override
-      public boolean accept(Path path) {
-        return !path.getName().endsWith(".crc");
-      }
-    });
-    for (FileStatus seqFile : seqFileList) {
+    for (FileStatus seqFile : fs.globStatus(new Path(seqFileDir, "part-*"))) {
       Path path = seqFile.getPath();
       System.out.println("Input Path: " + path);
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);