You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/27 13:26:11 UTC
svn commit: r788958 -
/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
Author: gsingers
Date: Sat Jun 27 11:26:10 2009
New Revision: 788958
URL: http://svn.apache.org/viewvc?rev=788958&view=rev
Log:
dump any sequence file
Added:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
Added: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=788958&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (added)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Sat Jun 27 11:26:10 2009
@@ -0,0 +1,112 @@
+package org.apache.mahout.utils;
+
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.cli2.util.HelpFormatter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.jobcontrol.Job;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.clustering.ClusterBase;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.io.FileWriter;
+import java.io.OutputStreamWriter;
+
+
+/**
+ *
+ *
+ **/
+public class SequenceFileDumper {
+ private transient static Log log = LogFactory.getLog(SequenceFileDumper.class);
+ private static final String LINE_SEP = System.getProperty("line.separator");
+
+ public static void main(String[] args) throws IOException, IllegalAccessException, InstantiationException {
+ DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+ ArgumentBuilder abuilder = new ArgumentBuilder();
+ GroupBuilder gbuilder = new GroupBuilder();
+
+ Option seqOpt = obuilder.withLongName("seqFile").withRequired(false).withArgument(
+ abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
+ withDescription("The Sequence File containing the Clusters").withShortName("s").create();
+ Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+ withDescription("The output file. If not specified, dumps to the console").withShortName("o").create();
+ Option substringOpt = obuilder.withLongName("substring").withRequired(false).withArgument(
+ abuilder.withName("substring").withMinimum(1).withMaximum(1).create()).
+ withDescription("The number of chars of the asFormatString() to print").withShortName("b").create();
+ Option helpOpt = obuilder.withLongName("help").
+ withDescription("Print out help").withShortName("h").create();
+
+ Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(substringOpt).create();
+
+ try {
+ Parser parser = new Parser();
+ parser.setGroup(group);
+ CommandLine cmdLine = parser.parse(args);
+
+ if (cmdLine.hasOption(helpOpt)) {
+
+ printHelp(group);
+ return;
+ }
+
+ if (cmdLine.hasOption(seqOpt)) {
+ Path path = new Path(cmdLine.getValue(seqOpt).toString());
+ System.out.println("Input Path: " + path);
+ JobClient client = new JobClient();
+ JobConf conf = new JobConf(Job.class);
+ client.setConf(conf);
+ FileSystem fs = FileSystem.get(path.toUri(), conf);
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+
+ Writer writer = null;
+ if (cmdLine.hasOption(outputOpt)){
+ writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
+ } else {
+ writer = new OutputStreamWriter(System.out);
+ }
+ int sub = Integer.MAX_VALUE;
+ if (cmdLine.hasOption(substringOpt)) {
+ sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
+ }
+ Writable key = (Writable) reader.getKeyClass().newInstance();
+ Writable value = (Writable) reader.getValueClass().newInstance();
+ while (reader.next(key, value)){
+ writer.write("Key: " + key);
+ writer.write(": Value: " + value);
+ writer.write(LINE_SEP);
+ writer.flush();
+ }
+ if (cmdLine.hasOption(outputOpt)){
+ writer.flush();
+ writer.close();
+ }
+ }
+
+ } catch (OptionException e) {
+ log.error("Exception", e);
+ printHelp(group);
+ }
+
+ }
+
+ private static void printHelp(Group group) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.setGroup(group);
+ formatter.print();
+ }
+}
\ No newline at end of file