You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/24 16:14:50 UTC
svn commit: r788033 -
/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
Author: gsingers
Date: Wed Jun 24 14:14:50 2009
New Revision: 788033
URL: http://svn.apache.org/viewvc?rev=788033&view=rev
Log:
dump a sequence file containing Vectors to either the console or a file using Vector.asFormatString
Added:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
Added: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=788033&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (added)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Wed Jun 24 14:14:50 2009
@@ -0,0 +1,99 @@
+package org.apache.mahout.utils.vectors;
+
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.cli2.util.HelpFormatter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.jobcontrol.Job;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.matrix.Vector;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.Writer;
+import java.io.FileWriter;
+import java.io.OutputStreamWriter;
+
+
+/**
+ *
+ *
+ **/
+public class VectorDumper {
+ private transient static Log log = LogFactory.getLog(VectorDumper.class);
+ private static final String LINE_SEP = System.getProperty("line.separator");
+
+ public static void main(String[] args) throws IOException {
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option seqOpt = obuilder.withLongName("seqFile").withRequired(false).withArgument(
+ abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Sequence File containing the Vectors").withShortName("s").create();
+ Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+ withDescription("The output file. If not specified, dumps to the console").withShortName("o").create();
+ Option helpOpt = obuilder.withLongName("help").
+ withDescription("Print out help").withShortName("h").create();
+
+ Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+
+ if (cmdLine.hasOption(helpOpt)) {
+
+ printHelp(group);
+ return;
+ }
+
+ if (cmdLine.hasOption(seqOpt)) {
+ Path path = new Path(cmdLine.getValue(seqOpt).toString());
+ JobClient client = new JobClient();
+ JobConf conf = new JobConf(Job.class);
+ client.setConf(conf);
+ FileSystem fs = FileSystem.get(path.toUri(), conf);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+ VectorIterable iter = new SequenceFileVectorIterable(reader);
+ Writer writer = null;
+ if (cmdLine.hasOption(outputOpt)){
+ writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
+ } else {
+ writer = new OutputStreamWriter(System.out);
+ }
+ for (Vector vector : iter) {
+ writer.write(vector.asFormatString());
+ writer.write(LINE_SEP);
+ }
+ if (cmdLine.hasOption(outputOpt)){
+ writer.close();
+ }
+ }
+
+ } catch (OptionException e) {
+ log.error("Exception", e);
+ printHelp(group);
+ }
+
+ }
+
+ private static void printHelp(Group group) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.setGroup(group);
+ formatter.print();
+ }
+}