You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2011/07/15 13:30:14 UTC

svn commit: r1147100 - /mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java

Author: gsingers
Date: Fri Jul 15 11:30:14 2011
New Revision: 1147100

URL: http://svn.apache.org/viewvc?rev=1147100&view=rev
Log:
add ability to restrict the number of KV pairs to dump

Modified:
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1147100&r1=1147099&r2=1147100&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Fri Jul 15 11:30:14 2011
@@ -17,10 +17,6 @@
 
 package org.apache.mahout.utils;
 
-import java.io.File;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-
 import com.google.common.base.Charsets;
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
@@ -41,45 +37,52 @@ import org.apache.mahout.common.iterator
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.File;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
 public final class SequenceFileDumper {
-  
+
   private static final Logger log = LoggerFactory.getLogger(SequenceFileDumper.class);
-  
+
   private SequenceFileDumper() {
   }
-  
+
   public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-    
+
     Option seqOpt = obuilder.withLongName("seqFile").withRequired(false).withArgument(
-      abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
-      withDescription("The Sequence File containing the Clusters").withShortName("s").create();
+            abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
+            withDescription("The Sequence File containing the Clusters").withShortName("s").create();
     Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument(
-      abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-      withDescription("The output file.  If not specified, dumps to the console").withShortName("o").create();
+            abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+            withDescription("The output file.  If not specified, dumps to the console").withShortName("o").create();
     Option substringOpt = obuilder.withLongName("substring").withRequired(false).withArgument(
-      abuilder.withName("substring").withMinimum(1).withMaximum(1).create()).
-      withDescription("The number of chars of the asFormatString() to print").withShortName("b").create();
+            abuilder.withName("substring").withMinimum(1).withMaximum(1).create()).
+            withDescription("The number of chars of the asFormatString() to print").withShortName("b").create();
     Option countOpt = obuilder.withLongName("count").withRequired(false).
-    withDescription("Report the count only").withShortName("c").create();
+            withDescription("Report the count only").withShortName("c").create();
+    Option numItemsOpt = obuilder.withLongName("n").withRequired(false).withArgument(
+            abuilder.withName("numItems").withMinimum(1).withMaximum(1).create()).
+            withDescription("Output at most <n> key value pairs").withShortName("n").create();
     Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
-    
+
     Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt)
-      .withOption(substringOpt).withOption(countOpt).withOption(helpOpt).create();
-    
+            .withOption(substringOpt).withOption(countOpt).withOption(numItemsOpt).withOption(helpOpt).create();
+
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-      
+
       if (cmdLine.hasOption(helpOpt)) {
-        
+
         printHelp(group);
         return;
       }
-      
+
       if (cmdLine.hasOption(seqOpt)) {
         Path path = new Path(cmdLine.getValue(seqOpt).toString());
         Configuration conf = new Configuration();
@@ -98,7 +101,7 @@ public final class SequenceFileDumper {
             sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
           }
           boolean countOnly = cmdLine.hasOption(countOpt);
-          SequenceFileIterator<?,?> iterator = new SequenceFileIterator<Writable,Writable>(path, true, conf);
+          SequenceFileIterator<?, ?> iterator = new SequenceFileIterator<Writable, Writable>(path, true, conf);
           writer.append("Key class: ").append(iterator.getKeyClass().toString());
           writer.append(" Value Class: ").append(iterator.getValueClass().toString()).append('\n');
           long count = 0;
@@ -109,8 +112,13 @@ public final class SequenceFileDumper {
             }
             writer.append("Count: ").append(String.valueOf(count)).append('\n');
           } else {
-            while (iterator.hasNext()) {
-              Pair<?,?> record = iterator.next();
+            long numItems = Long.MAX_VALUE;
+            if (cmdLine.hasOption(numItemsOpt)) {
+              numItems = Long.parseLong(cmdLine.getValue(numItemsOpt).toString());
+              writer.append("Max Items to dump: ").append(String.valueOf(numItems));
+            }
+            while (iterator.hasNext() && count < numItems) {
+              Pair<?, ?> record = iterator.next();
               writer.append("Key: ").append(record.getFirst().toString());
               String str = record.getSecond().toString();
               writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str);
@@ -123,14 +131,14 @@ public final class SequenceFileDumper {
           Closeables.closeQuietly(writer);
         }
       }
-      
+
     } catch (OptionException e) {
       log.error("Exception", e);
       printHelp(group);
     }
-    
+
   }
-  
+
   private static void printHelp(Group group) {
     HelpFormatter formatter = new HelpFormatter();
     formatter.setGroup(group);