You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by dk...@apache.org on 2018/11/29 20:45:26 UTC
[avro] branch master updated: AVRO-1858 add tojson head mode (#100)
This is an automated email from the ASF dual-hosted git repository.
dkulp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new ee5008d AVRO-1858 add tojson head mode (#100)
ee5008d is described below
commit ee5008d9ed66f3237cf1304484b1d1e6836c8a33
Author: MikeHurleySurescripts <mi...@surescripts.com>
AuthorDate: Thu Nov 29 14:39:34 2018 -0600
AVRO-1858 add tojson head mode (#100)
* AVRO-1858: added --head option to the tojson operation
* AVRO-1858: added unit tests for tojson --head option
* AVRO-1858: head input and record counters are now longs
* AVRO-1858: added tojson --head tests for zero and negative values. Negative head count is now an error.
---
.../org/apache/avro/tool/DataFileReadTool.java | 35 +++++++++++++++++--
.../org/apache/avro/tool/TestDataFileTools.java | 40 +++++++++++++++++++++-
2 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index 145099d..ad63f21 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -20,12 +20,14 @@ package org.apache.avro.tool;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.List;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
+import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.io.DatumWriter;
@@ -36,6 +38,7 @@ import org.apache.avro.io.JsonEncoder;
/** Reads a data file and dumps to JSON */
public class DataFileReadTool implements Tool {
+ private static final long DEFAULT_HEAD_COUNT = 10;
@Override
public String getName() {
@@ -53,10 +56,14 @@ public class DataFileReadTool implements Tool {
OptionParser optionParser = new OptionParser();
OptionSpec<Void> prettyOption = optionParser
.accepts("pretty", "Turns on pretty printing.");
+ String headDesc = String.format("Converts the first X records (default is %d).", DEFAULT_HEAD_COUNT);
+ OptionSpec<String> headOption = optionParser.accepts("head", headDesc).withOptionalArg();
OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
Boolean pretty = optionSet.has(prettyOption);
- List<String> nargs = (List<String>)optionSet.nonOptionArguments();
+ List<String> nargs = new ArrayList<String>((List<String>)optionSet.nonOptionArguments());
+
+ long headCount = getHeadCount(optionSet, headOption, nargs);
if (nargs.size() != 1) {
printHelp(err);
@@ -73,8 +80,10 @@ public class DataFileReadTool implements Tool {
Schema schema = streamReader.getSchema();
DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty);
- for (Object datum : streamReader)
+ for(long recordCount = 0; streamReader.hasNext() && recordCount < headCount; recordCount++) {
+ Object datum = streamReader.next();
writer.write(datum, encoder);
+ }
encoder.flush();
out.println();
out.flush();
@@ -84,8 +93,28 @@ public class DataFileReadTool implements Tool {
return 0;
}
+ private static long getHeadCount(OptionSet optionSet, OptionSpec<String> headOption, List<String> nargs) {
+ long headCount = Long.MAX_VALUE;
+ if(optionSet.has(headOption)) {
+ headCount = DEFAULT_HEAD_COUNT;
+ List<String> headValues = optionSet.valuesOf(headOption);
+ if(headValues.size() > 0) {
+ // if the value parses to int, assume it's meant to go with --head
+ // otherwise assume it was an optionSet.nonOptionArgument and add back to the list
+ // TODO: support input filenames whose whole path+name is int parsable?
+ try {
+ headCount = Long.parseLong(headValues.get(0));
+ if(headCount < 0) throw new AvroRuntimeException("--head count must not be negative");
+ } catch(NumberFormatException ex) {
+ nargs.addAll(headValues);
+ }
+ }
+ }
+ return headCount;
+ }
+
private void printHelp(PrintStream ps) {
- ps.println("tojson --pretty input-file");
+ ps.println("tojson [--pretty] [--head[=X]] input-file");
ps.println();
ps.println(getShortDescription());
ps.println("A dash ('-') can be given as an input file to use stdin");
diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 341b6af..a9ebab6 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.file.DataFileReader;
@@ -48,7 +49,7 @@ import org.junit.rules.TemporaryFolder;
@SuppressWarnings("deprecation")
public class TestDataFileTools {
- static final int COUNT = 10;
+ static final int COUNT = 15;
static File sampleFile;
static String jsonData;
static Schema schema;
@@ -118,6 +119,43 @@ public class TestDataFileTools {
}
@Test
+ public void testReadHeadDefaultCount() throws Exception {
+ String expectedJson = jsonData.substring(0, 20); // first 10 numbers
+ assertEquals(expectedJson,
+ run(new DataFileReadTool(), "--head", sampleFile.getPath()));
+ }
+
+ @Test
+ public void testReadHeadEquals3Count() throws Exception {
+ String expectedJson = jsonData.substring(0, 6); // first 3 numbers
+ assertEquals(expectedJson,
+ run(new DataFileReadTool(), "--head=3", sampleFile.getPath()));
+ }
+
+ @Test
+ public void testReadHeadSpace5Count() throws Exception {
+ String expectedJson = jsonData.substring(0, 10); // first 5 numbers
+ assertEquals(expectedJson,
+ run(new DataFileReadTool(), "--head", "5", sampleFile.getPath()));
+ }
+
+ @Test
+ public void testReadHeadLongCount() throws Exception {
+ assertEquals(jsonData,
+ run(new DataFileReadTool(), "--head=3000000000", sampleFile.getPath()));
+ }
+
+ @Test
+ public void testReadHeadEqualsZeroCount() throws Exception {
+ assertEquals("\n", run(new DataFileReadTool(), "--head=0", sampleFile.getPath()));
+ }
+
+ @Test(expected = AvroRuntimeException.class)
+ public void testReadHeadNegativeCount() throws Exception {
+ assertEquals("\n", run(new DataFileReadTool(), "--head=-5", sampleFile.getPath()));
+ }
+
+ @Test
public void testGetMeta() throws Exception {
String output = run(new DataFileGetMetaTool(), sampleFile.getPath());
assertTrue(output, output.contains("avro.schema\t"+schema.toString()+"\n"));