You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/06/07 21:33:29 UTC

svn commit: r1490793 - in /mahout/trunk: CHANGELOG integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java src/conf/driver.classes.default.props

Author: gsingers
Date: Fri Jun  7 19:33:29 2013
New Revision: 1490793

URL: http://svn.apache.org/r1490793
Log:
MAHOUT-958: fix use with globs, MAHOUT-944: minor tweak to driver.classes

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
    mahout/trunk/src/conf/driver.classes.default.props

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri Jun  7 19:33:29 2013
@@ -87,4 +87,6 @@ __MAHOUT-1181: Adding StreamingKMeans Ma
 
   MAHOUT-961: Fix issue in decision forest tree visualizer to properly show stems of tree (Ikumasa Mukai via gsingers)
 
-  MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
\ No newline at end of file
+  MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
+
+  MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
\ No newline at end of file

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Fri Jun  7 19:33:29 2013
@@ -136,20 +136,23 @@ public final class RepresentativePointsD
   private static void writeInitialState(Path output, Path clustersIn) throws IOException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(output.toUri(), conf);
-    for (FileStatus part : fs.listStatus(clustersIn, PathFilters.logsCRCFilter())) {
-      Path inPart = part.getPath();
-      Path path = new Path(output, inPart.getName());
-      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
-      try {
-        for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
-          Cluster cluster = clusterWritable.getValue();
-          if (log.isDebugEnabled()) {
-            log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
+    for (FileStatus dir : fs.globStatus(clustersIn)) {
+      Path inPath = dir.getPath();
+      for (FileStatus part : fs.listStatus(inPath, PathFilters.logsCRCFilter())) {
+        Path inPart = part.getPath();
+        Path path = new Path(output, inPart.getName());
+        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
+        try {
+          for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
+            Cluster cluster = clusterWritable.getValue();
+            if (log.isDebugEnabled()) {
+              log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
+            }
+            writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
           }
-          writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
+        } finally {
+          Closeables.close(writer, false);
         }
-      } finally {
-        Closeables.closeQuietly(writer);
       }
     }
   }

Modified: mahout/trunk/src/conf/driver.classes.default.props
URL: http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.default.props?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.default.props (original)
+++ mahout/trunk/src/conf/driver.classes.default.props Fri Jun  7 19:33:29 2013
@@ -13,7 +13,7 @@ org.apache.mahout.vectorizer.SparseVecto
 org.apache.mahout.vectorizer.EncodedVectorsFromSequenceFiles = seq2encoded: Encoded Sparse Vector generation from Text sequence files
 org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file
 org.apache.mahout.text.SequenceFilesFromMailArchives = seqmailarchives : Creates SequenceFile from a directory containing gzipped mail archives
-org.apache.mahout.text.LuceneIndexToSequenceFilesDriver = lucene2seq : Generate Text SequenceFiles from a Lucene index
+org.apache.mahout.text.SequenceFilesFromLuceneStorageDriver = lucene2seq : Generate Text SequenceFiles from a Lucene index
 
 #Math
 org.apache.mahout.math.hadoop.TransposeJob = transpose : Take the transpose of a matrix