You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/11/29 20:45:26 UTC

[avro] branch master updated: AVRO-1858 add tojson head mode (#100)

This is an automated email from the ASF dual-hosted git repository.

dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new ee5008d  AVRO-1858 add tojson head mode (#100)
ee5008d is described below

commit ee5008d9ed66f3237cf1304484b1d1e6836c8a33
Author: MikeHurleySurescripts <mi...@surescripts.com>
AuthorDate: Thu Nov 29 14:39:34 2018 -0600

    AVRO-1858 add tojson head mode (#100)
    
    * AVRO-1858: added --head option to the tojson operation
    
    * AVRO-1858: added unit tests for tojson --head option
    
    * AVRO-1858: head input and record counters are now longs
    
    * AVRO-1858: added tojson --head tests for zero and negative values. Negative head count is now an error.
---
 .../org/apache/avro/tool/DataFileReadTool.java     | 35 +++++++++++++++++--
 .../org/apache/avro/tool/TestDataFileTools.java    | 40 +++++++++++++++++++++-
 2 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index 145099d..ad63f21 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -20,12 +20,14 @@ package org.apache.avro.tool;
 import java.io.BufferedInputStream;
 import java.io.InputStream;
 import java.io.PrintStream;
+import java.util.ArrayList;
 import java.util.List;
 
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileStream;
 import org.apache.avro.io.DatumWriter;
@@ -36,6 +38,7 @@ import org.apache.avro.io.JsonEncoder;
 
 /** Reads a data file and dumps to JSON */
 public class DataFileReadTool implements Tool {
+  private static final long DEFAULT_HEAD_COUNT = 10;
 
   @Override
   public String getName() {
@@ -53,10 +56,14 @@ public class DataFileReadTool implements Tool {
     OptionParser optionParser = new OptionParser();
     OptionSpec<Void> prettyOption = optionParser
         .accepts("pretty", "Turns on pretty printing.");
+    String headDesc = String.format("Converts the first X records (default is %d).", DEFAULT_HEAD_COUNT);
+    OptionSpec<String> headOption = optionParser.accepts("head", headDesc).withOptionalArg();
 
     OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
     Boolean pretty = optionSet.has(prettyOption);
-    List<String> nargs = (List<String>)optionSet.nonOptionArguments();
+    List<String> nargs = new ArrayList<String>((List<String>)optionSet.nonOptionArguments());
+
+    long headCount = getHeadCount(optionSet, headOption, nargs);
 
     if (nargs.size() != 1) {
       printHelp(err);
@@ -73,8 +80,10 @@ public class DataFileReadTool implements Tool {
       Schema schema = streamReader.getSchema();
       DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
       JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty);
-      for (Object datum : streamReader)
+      for(long recordCount = 0; streamReader.hasNext() && recordCount < headCount; recordCount++) {
+        Object datum = streamReader.next();
         writer.write(datum, encoder);
+      }
       encoder.flush();
       out.println();
       out.flush();
@@ -84,8 +93,28 @@ public class DataFileReadTool implements Tool {
     return 0;
   }
 
+  private static long getHeadCount(OptionSet optionSet, OptionSpec<String> headOption, List<String> nargs) {
+    long headCount = Long.MAX_VALUE;
+    if(optionSet.has(headOption)) {
+      headCount = DEFAULT_HEAD_COUNT;
+      List<String> headValues = optionSet.valuesOf(headOption);
+      if(headValues.size() > 0) {
+        // if the value parses to int, assume it's meant to go with --head
+        // otherwise assume it was an optionSet.nonOptionArgument and add back to the list
+        // TODO: support input filenames whose whole path+name is int parsable?
+        try {
+          headCount = Long.parseLong(headValues.get(0));
+          if(headCount < 0) throw new AvroRuntimeException("--head count must not be negative");
+        } catch(NumberFormatException ex) {
+          nargs.addAll(headValues);
+        }
+      }
+    }
+    return headCount;
+  }
+
   private void printHelp(PrintStream ps) {
-    ps.println("tojson --pretty input-file");
+    ps.println("tojson [--pretty] [--head[=X]] input-file");
     ps.println();
     ps.println(getShortDescription());
     ps.println("A dash ('-') can be given as an input file to use stdin");
diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 341b6af..a9ebab6 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.file.DataFileReader;
@@ -48,7 +49,7 @@ import org.junit.rules.TemporaryFolder;
 
 @SuppressWarnings("deprecation")
 public class TestDataFileTools {
-  static final int COUNT = 10;
+  static final int COUNT = 15;
   static File sampleFile;
   static String jsonData;
   static Schema schema;
@@ -118,6 +119,43 @@ public class TestDataFileTools {
   }
 
   @Test
+  public void testReadHeadDefaultCount() throws Exception {
+    String expectedJson = jsonData.substring(0, 20); // first 10 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadEquals3Count() throws Exception {
+    String expectedJson = jsonData.substring(0, 6); // first 3 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head=3", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadSpace5Count() throws Exception {
+    String expectedJson = jsonData.substring(0, 10); // first 5 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head", "5", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadLongCount() throws Exception {
+    assertEquals(jsonData,
+      run(new DataFileReadTool(), "--head=3000000000", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadEqualsZeroCount() throws Exception {
+    assertEquals("\n", run(new DataFileReadTool(), "--head=0", sampleFile.getPath()));
+  }
+
+  @Test(expected = AvroRuntimeException.class)
+  public void testReadHeadNegativeCount() throws Exception {
+    assertEquals("\n", run(new DataFileReadTool(), "--head=-5", sampleFile.getPath()));
+  }
+
+  @Test
   public void testGetMeta() throws Exception {
     String output = run(new DataFileGetMetaTool(), sampleFile.getPath());
     assertTrue(output, output.contains("avro.schema\t"+schema.toString()+"\n"));