You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/24 16:14:50 UTC

svn commit: r788033 - /lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java

Author: gsingers
Date: Wed Jun 24 14:14:50 2009
New Revision: 788033

URL: http://svn.apache.org/viewvc?rev=788033&view=rev
Log:
dump a sequence file containing Vectors to either the console or a file using Vector.asFormatString

Added:
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java

Added: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=788033&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (added)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Wed Jun 24 14:14:50 2009
@@ -0,0 +1,99 @@
+package org.apache.mahout.utils.vectors;
+
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.cli2.util.HelpFormatter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.jobcontrol.Job;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.matrix.Vector;
+
+import java.io.IOException;
+import java.io.BufferedWriter;
+import java.io.Writer;
+import java.io.FileWriter;
+import java.io.OutputStreamWriter;
+
+
+/**
+ *
+ *
+ **/
+public class VectorDumper {
+  private transient static Log log = LogFactory.getLog(VectorDumper.class);
+  private static final String LINE_SEP = System.getProperty("line.separator");
+
+  public static void main(String[] args) throws IOException {
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option seqOpt = obuilder.withLongName("seqFile").withRequired(false).withArgument(
+            abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
+            withDescription("The Sequence File containing the Vectors").withShortName("s").create();
+    Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+            abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+            withDescription("The output file.  If not specified, dumps to the console").withShortName("o").create();
+    Option helpOpt = obuilder.withLongName("help").
+            withDescription("Print out help").withShortName("h").create();
+
+    Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+
+      if (cmdLine.hasOption(helpOpt)) {
+
+        printHelp(group);
+        return;
+      }
+
+      if (cmdLine.hasOption(seqOpt)) {
+        Path path = new Path(cmdLine.getValue(seqOpt).toString());
+        JobClient client = new JobClient();
+        JobConf conf = new JobConf(Job.class);
+        client.setConf(conf);
+        FileSystem fs = FileSystem.get(path.toUri(), conf);
+        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+        VectorIterable iter = new SequenceFileVectorIterable(reader);
+        Writer writer = null;
+        if (cmdLine.hasOption(outputOpt)){
+          writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
+        } else {
+          writer = new OutputStreamWriter(System.out);
+        }
+        for (Vector vector : iter) {
+          writer.write(vector.asFormatString());
+          writer.write(LINE_SEP);
+        }
+        if (cmdLine.hasOption(outputOpt)){
+          writer.close();
+        }
+      }
+
+    } catch (OptionException e) {
+      log.error("Exception", e);
+      printHelp(group);
+    }
+
+  }
+
+  private static void printHelp(Group group) {
+    HelpFormatter formatter = new HelpFormatter();
+    formatter.setGroup(group);
+    formatter.print();
+  }
+}