You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/06/07 21:33:29 UTC
svn commit: r1490793 - in /mahout/trunk: CHANGELOG
integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
src/conf/driver.classes.default.props
Author: gsingers
Date: Fri Jun 7 19:33:29 2013
New Revision: 1490793
URL: http://svn.apache.org/r1490793
Log:
MAHOUT-958: fix use with globs, MAHOUT-944: minor tweak to driver.classes
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
mahout/trunk/src/conf/driver.classes.default.props
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri Jun 7 19:33:29 2013
@@ -87,4 +87,6 @@ __MAHOUT-1181: Adding StreamingKMeans Ma
MAHOUT-961: Fix issue in decision forest tree visualizer to properly show stems of tree (Ikumasa Mukai via gsingers)
- MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
\ No newline at end of file
+ MAHOUT-944: Create SequenceFiles out of Lucene document storage (no term vectors required) (Frank Scholten, gsingers)
+
+ MAHOUT-958: Fix issue with globs in RepresentativePointsDriver (Adam Baron, Vikram Dixit K, ehgjr via gsingers)
\ No newline at end of file
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Fri Jun 7 19:33:29 2013
@@ -136,20 +136,23 @@ public final class RepresentativePointsD
private static void writeInitialState(Path output, Path clustersIn) throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(output.toUri(), conf);
- for (FileStatus part : fs.listStatus(clustersIn, PathFilters.logsCRCFilter())) {
- Path inPart = part.getPath();
- Path path = new Path(output, inPart.getName());
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
- try {
- for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
- Cluster cluster = clusterWritable.getValue();
- if (log.isDebugEnabled()) {
- log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
+ for (FileStatus dir : fs.globStatus(clustersIn)) {
+ Path inPath = dir.getPath();
+ for (FileStatus part : fs.listStatus(inPath, PathFilters.logsCRCFilter())) {
+ Path inPart = part.getPath();
+ Path path = new Path(output, inPart.getName());
+ SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
+ try {
+ for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
+ Cluster cluster = clusterWritable.getValue();
+ if (log.isDebugEnabled()) {
+ log.debug("C-{}: {}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null));
+ }
+ writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
}
- writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
+ } finally {
+ Closeables.close(writer, false);
}
- } finally {
- Closeables.closeQuietly(writer);
}
}
}
Modified: mahout/trunk/src/conf/driver.classes.default.props
URL: http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.default.props?rev=1490793&r1=1490792&r2=1490793&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.default.props (original)
+++ mahout/trunk/src/conf/driver.classes.default.props Fri Jun 7 19:33:29 2013
@@ -13,7 +13,7 @@ org.apache.mahout.vectorizer.SparseVecto
org.apache.mahout.vectorizer.EncodedVectorsFromSequenceFiles = seq2encoded: Encoded Sparse Vector generation from Text sequence files
org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file
org.apache.mahout.text.SequenceFilesFromMailArchives = seqmailarchives : Creates SequenceFile from a directory containing gzipped mail archives
-org.apache.mahout.text.LuceneIndexToSequenceFilesDriver = lucene2seq : Generate Text SequenceFiles from a Lucene index
+org.apache.mahout.text.SequenceFilesFromLuceneStorageDriver = lucene2seq : Generate Text SequenceFiles from a Lucene index
#Math
org.apache.mahout.math.hadoop.TransposeJob = transpose : Take the transpose of a matrix