You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2013/04/27 00:12:16 UTC
svn commit: r1476418 - in /avro/trunk: ./
lang/java/tools/src/main/java/org/apache/avro/tool/
lang/java/tools/src/test/java/org/apache/avro/tool/
Author: cutting
Date: Fri Apr 26 22:12:16 2013
New Revision: 1476418
URL: http://svn.apache.org/r1476418
Log:
AVRO-1307. Java: Add 'cat' tool to append and sample data files. Contributed by Vincenz Priesnitz.
Added:
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java (with props)
avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java (with props)
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Apr 26 22:12:16 2013
@@ -4,6 +4,9 @@ Trunk (not yet released)
NEW FEATURES
+ AVRO-1307. Java: Add 'cat' tool to append and sample data files.
+ (Vincenz Priesnitz via cutting)
+
IMPROVEMENTS
AVRO-1260. Ruby: Improve read performance. (Martin Kleppmann via cutting)
Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java?rev=1476418&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java Fri Apr 26 22:12:16 2013
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+
+/** Tool to extract samples from an Avro data file. */
+public class CatTool implements Tool {
+
+ private long totalCopied;
+ private double sampleCounter;
+
+ private GenericRecord reuse;
+ private DataFileStream<GenericRecord> reader;
+ private DataFileWriter<GenericRecord> writer;
+ private Schema schema;
+ private List<Path> inFiles;
+ private int currentInput;
+
+ @Override
+ public int run(InputStream in, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ OptionParser optParser = new OptionParser();
+ OptionSpec<Long> offsetOpt = optParser
+ .accepts("offset", "offset for reading input")
+ .withRequiredArg()
+ .ofType(Long.class)
+ .defaultsTo(new Long(0));
+ OptionSpec<Long> limitOpt = optParser
+ .accepts("limit", "maximum number of records in the outputfile")
+ .withRequiredArg()
+ .ofType(Long.class)
+ .defaultsTo(Long.MAX_VALUE);
+ OptionSpec<Double> fracOpt = optParser
+ .accepts("samplerate", "rate at which records will be collected")
+ .withRequiredArg()
+ .ofType(Double.class)
+ .defaultsTo(new Double(1));
+
+ OptionSet opts = optParser.parse(args.toArray(new String[0]));
+ List<String> nargs = opts.nonOptionArguments();
+ if (nargs.size() < 2) {
+ printHelp(out);
+ return 0;
+ }
+
+ inFiles = Util.getFiles(nargs.subList(0, nargs.size()-1));
+
+ System.out.println("List of input files:");
+ for (Path p : inFiles) {
+ System.out.println(p);
+ }
+ currentInput = -1;
+ nextInput();
+
+ OutputStream output = out;
+ String lastArg = nargs.get(nargs.size()-1);
+ if (nargs.size() > 1 && !lastArg.equals("-")) {
+ output = Util.createFromFS(lastArg);
+ }
+ writer = new DataFileWriter<GenericRecord>(
+ new GenericDatumWriter<GenericRecord>());
+
+ String codecName = reader.getMetaString(DataFileConstants.CODEC);
+ CodecFactory codec = (codecName == null)
+ ? CodecFactory.fromString(DataFileConstants.NULL_CODEC)
+ : CodecFactory.fromString(codecName);
+ writer.setCodec(codec);
+ for (String key : reader.getMetaKeys()) {
+ if (!DataFileWriter.isReservedMeta(key)) {
+ writer.setMeta(key, reader.getMeta(key));
+ }
+ }
+ writer.create(schema, output);
+
+ long offset = opts.valueOf(offsetOpt);
+ long limit = opts.valueOf(limitOpt);
+ double samplerate = opts.valueOf(fracOpt);
+ sampleCounter = 1;
+ totalCopied = 0;
+ reuse = null;
+
+ if (limit < 0) {
+ System.out.println("limit has to be non-negative");
+ this.printHelp(out);
+ return 1;
+ }
+ if (offset < 0) {
+ System.out.println("offset has to be non-negative");
+ this.printHelp(out);
+ return 1;
+ }
+ if (samplerate < 0 || samplerate > 1) {
+ System.out.println("samplerate has to be a number between 0 and 1");
+ this.printHelp(out);
+ return 1;
+ }
+
+ skip(offset);
+ writeRecords(limit, samplerate);
+ System.out.println(totalCopied + " records written.");
+
+ writer.flush();
+ writer.close();
+ Util.close(out);
+ return 0;
+ }
+
+ private void nextInput() throws IOException{
+ currentInput++;
+ Path path = inFiles.get(currentInput);
+ FSDataInputStream input = new FSDataInputStream(Util.openFromFS(path));
+ reader = new DataFileStream<GenericRecord>(input, new GenericDatumReader<GenericRecord>());
+ if (schema == null) { // if this is the first file, the schema gets saved
+ schema = reader.getSchema();
+ }
+ else if (!schema.equals(reader.getSchema())) { // subsequent files have to have equal schemas
+ throw new IOException("schemas dont match");
+ }
+ }
+
+ private boolean hasNextInput() {
+ return inFiles.size() > (currentInput + 1);
+ }
+
+ /**skips a number of records from the input*/
+ private long skip(long skip) throws IOException {
+ long skipped = 0;
+ while( 0 < skip && reader.hasNext()) {
+ reader.next(reuse);
+ skip--;
+ skipped++;
+ }
+ if ((0 < skip) && hasNextInput()) { // goto next file
+ nextInput();
+ skipped = skipped + skip(skip);
+ }
+ return skipped;
+}
+
+ /** writes records with the given samplerate
+ * The record at position offset is guaranteed to be taken*/
+ private long writeRecords(long count, double samplerate) throws IOException {
+ long written = 0;
+ while(written < count && reader.hasNext()) {
+ reuse = reader.next(reuse);
+ sampleCounter = sampleCounter + samplerate;
+ if (sampleCounter >= 1) {
+ writer.append(reuse);
+ written++;
+ sampleCounter--;
+ }
+ }
+ totalCopied = totalCopied + written;
+ if (written < count && hasNextInput()) { // goto next file
+ nextInput();
+ written = written + writeRecords(count - written, samplerate);
+ }
+ return written;
+ }
+
+ private void printHelp(PrintStream out) {
+ out.println("cat --offset <offset> --limit <limit> --samplerate <samplerate> [input-files...] output-file");
+ out.println();
+ out.println("extracts records from a list of input files into a new file.");
+ out.println("--offset start of the extract");
+ out.println("--limit maximum number of records in the output file.");
+ out.println("--samplerate rate at which records will be collected");
+ out.println("A dash ('-') can be given to direct output to stdout");
+ }
+
+ @Override
+ public String getName() {
+ return "cat";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "extracts samples from files";
+ }
+
+}
Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java Fri Apr 26 22:12:16 2013
@@ -35,6 +35,7 @@ public class Main {
Main() {
tools = new TreeMap<String, Tool>();
for (Tool tool : new Tool[] {
+ new CatTool(),
new SpecificCompilerTool(),
new InduceSchemaTool(),
new JsonToBinaryFragmentTool(),
Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java Fri Apr 26 22:12:16 2013
@@ -23,6 +23,9 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
@@ -30,6 +33,8 @@ import org.apache.avro.generic.GenericDa
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.mapred.FsInput;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/** Static utility methods for tools. */
@@ -75,6 +80,17 @@ class Util {
}
/**
+ * Returns an InputStream for the file using the owning filesystem,
+ * or the default if none is given.
+ * @param filename The filename to be opened
+ * @throws IOException
+ */
+ static InputStream openFromFS(Path filename)
+ throws IOException {
+ return filename.getFileSystem(new Configuration()).open(filename);
+ }
+
+ /**
* Returns a seekable FsInput using the owning filesystem,
* or the default if none is given.
* @param filename The filename to be opened
@@ -128,6 +144,54 @@ class Util {
}
}
+ /**If pathname is a file, this method returns a list with a single absolute Path to that file,
+ * if pathname is a directory, this method returns a list of Pathes to all the files within
+ * this directory.
+ * Only files inside that directory are included, no subdirectories or files in subdirectories
+ * will be added.
+ * The List is sorted alphabetically.
+ * @param fileOrDirName filename or directoryname
+ * @return A Path List
+ * @throws IOException
+ */
+ static List<Path> getFiles(String fileOrDirName)
+ throws IOException {
+ List<Path> pathList = new ArrayList<Path>();
+ Path path = new Path(fileOrDirName);
+ FileSystem fs = path.getFileSystem(new Configuration());
+
+ if (fs.isFile(path)) {
+ pathList.add(path);
+ }
+ else if (fs.getFileStatus(path).isDir()) {
+ for (FileStatus status : fs.listStatus(path)) {
+ if(!status.isDir()) {
+ pathList.add(status.getPath());
+ }
+ }
+ }
+ Collections.sort(pathList);
+ return pathList;
+ }
+
+ /**
+ * This method returns a list which contains a path to every given file
+ * in the input and a path to every file inside a given directory.
+ * The list is sorted alphabetically and contains no subdirectories or files within those.
+ * @param fileOrDirNames A list of filenames and directorynames
+ * @return A list of Pathes, one for each file
+ * @throws IOException
+ */
+ static List<Path> getFiles(List<String> fileOrDirNames)
+ throws IOException {
+ ArrayList<Path> pathList = new ArrayList<Path>();
+ for(String name : fileOrDirNames) {
+ pathList.addAll(getFiles(name));
+ }
+ Collections.sort(pathList);
+ return pathList;
+ }
+
/**
* Converts a String JSON object into a generic datum.
*
Added: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java?rev=1476418&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java (added)
+++ avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java Fri Apr 26 22:12:16 2013
@@ -0,0 +1,347 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.junit.Test;
+
+public class TestCatTool {
+ private static final int ROWS_IN_INPUT_FILES = 100000;
+ private static final int OFFSET = 1000;
+ private static final int LIMIT_WITHIN_INPUT_BOUNDS = 100;
+ private static final int LIMIT_OUT_OF_INPUT_BOUNDS = 100001;
+ private static final double SAMPLERATE = .01;
+ private static final double SAMPLERATE_TOO_SMALL = .00000001;
+
+ private final Schema INTSCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\", " +
+ "\"name\":\"myRecord\", " +
+ "\"fields\":[ " +
+ "{\"name\":\"value\",\"type\":\"int\"} " +
+ "]}");
+ private final Schema STRINGSCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\", " +
+ "\"name\":\"myRecord\", " +
+ "\"fields\":[ {\"name\":\"value\",\"type\":\"string\"} " +
+ "]}");
+ private static final CodecFactory DEFLATE = CodecFactory.deflateCodec(9);
+ private static final CodecFactory SNAPPY = CodecFactory.snappyCodec();
+
+
+ private GenericRecord aDatum(Type ofType, int forRow) {
+ GenericRecord record = null;
+ switch (ofType) {
+ case STRING:
+ record = new GenericData.Record(STRINGSCHEMA);
+ record.put("value", String.valueOf(forRow % 100));
+ return record;
+ case INT:
+ record = new GenericData.Record(INTSCHEMA);
+ record.put("value", forRow);
+ return record;
+ default:
+ throw new AssertionError("I can't generate data for this type");
+ }
+ }
+
+ private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception {
+ File inputFile = AvroTestUtil.tempFile(getClass(), file);
+ inputFile.deleteOnExit();
+
+ Schema schema = null;
+ if(type.equals(Schema.Type.INT)) {
+ schema = INTSCHEMA;
+ }
+ if(type.equals(Schema.Type.STRING)) {
+ schema = STRINGSCHEMA;
+ }
+
+ DataFileWriter<Object> writer = new DataFileWriter<Object>(
+ new GenericDatumWriter<Object>(schema));
+ for(Entry<String, String> metadatum : metadata.entrySet()) {
+ writer.setMeta(metadatum.getKey(), metadatum.getValue());
+ }
+ writer.setCodec(codec);
+ writer.create(schema, inputFile);
+
+ for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) {
+ writer.append(aDatum(type, i));
+ }
+ writer.close();
+
+ return inputFile;
+ }
+
+
+ private int getFirstIntDatum(File file) throws Exception {
+ DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new FileInputStream(file) ,
+ new GenericDatumReader<GenericRecord>());
+
+ int result = (Integer) reader.next().get(0);
+ System.out.println(result);
+ reader.close();
+ return result;
+ }
+
+ private int numRowsInFile(File output) throws Exception {
+ DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
+ new FileInputStream(output),
+ new GenericDatumReader<GenericRecord>());
+ Iterator<GenericRecord> rows = reader.iterator();
+ int rowcount = 0;
+ while(rows.hasNext()) {
+ ++rowcount;
+ rows.next();
+ }
+ reader.close();
+ return rowcount;
+ }
+
+ @Test
+ public void testCat() throws Exception {
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File input2 = generateData("input2.avro", Type.INT, metadata, SNAPPY);
+ File input3 = generateData("input3.avro", Type.INT, metadata, DEFLATE);
+
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+// file input
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ input2.getAbsolutePath(),
+ input3.getAbsolutePath(),
+ "--offset" , String.valueOf(OFFSET),
+ "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS),
+ "--samplerate" , String.valueOf(SAMPLERATE),
+ output.getAbsolutePath());
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+
+ assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
+
+// folder input
+ args = asList(
+ input1.getParentFile().getAbsolutePath(),
+ output.getAbsolutePath(),
+ "--offset" , String.valueOf(OFFSET),
+ "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS));
+ returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+ assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
+ }
+
+
+ @Test
+ public void testLimitOutOfBounds() throws Exception {
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ "--offset=" + String.valueOf(OFFSET),
+ "--limit=" + String.valueOf(LIMIT_OUT_OF_INPUT_BOUNDS),
+ output.getAbsolutePath());
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+ assertEquals(ROWS_IN_INPUT_FILES - OFFSET, numRowsInFile(output));
+ }
+
+ @Test
+ public void testSamplerateAccuracy() throws Exception {
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String>args = asList(
+ input1.getAbsolutePath(),
+ output.getAbsolutePath(),
+ "--offset" , String.valueOf(OFFSET),
+ "--samplerate" , String.valueOf(SAMPLERATE));
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+
+ assertTrue("Outputsize is not roughly (Inputsize - Offset) * samplerate",
+ (ROWS_IN_INPUT_FILES - OFFSET)*SAMPLERATE - numRowsInFile(output) < 2);
+ assertTrue("", (ROWS_IN_INPUT_FILES - OFFSET)*SAMPLERATE - numRowsInFile(output) > -2);
+ }
+
+ @Test
+ public void testOffSetAccuracy() throws Exception {
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ "--offset" , String.valueOf(OFFSET),
+ "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS),
+ "--samplerate" , String.valueOf(SAMPLERATE),
+ output.getAbsolutePath());
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+ assertEquals("output does not start at offset",
+ OFFSET, getFirstIntDatum(output));
+ }
+
+ @Test
+ public void testOffsetBiggerThanInput() throws Exception{
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ "--offset" , String.valueOf(ROWS_IN_INPUT_FILES + 1),
+ output.getAbsolutePath());
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+ assertEquals("output is not empty",
+ 0, numRowsInFile(output));
+ }
+
+ @Test
+ public void testSamplerateSmallerThanInput() throws Exception{
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ output.getAbsolutePath(),
+ "--offset=" + new Integer(OFFSET).toString(),
+ "--samplerate=" + new Double(SAMPLERATE_TOO_SMALL).toString());
+ int returnCode = new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ assertEquals(0, returnCode);
+
+ assertEquals("output should only contain the record at offset",
+ (int) OFFSET, getFirstIntDatum(output));
+ }
+
+
+ @Test(expected = IOException.class)
+ public void testDifferentSchemasFail() throws Exception {
+ Map<String, String> metadata = new HashMap<String, String>();
+ metadata.put("myMetaKey", "myMetaValue");
+
+ File input1 = generateData("input1.avro", Type.STRING, metadata, DEFLATE);
+ File input2 = generateData("input2.avro", Type.INT, metadata, DEFLATE);
+
+ File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+ output.deleteOnExit();
+
+ List<String> args = asList(
+ input1.getAbsolutePath(),
+ input2.getAbsolutePath(),
+ output.getAbsolutePath());
+ new CatTool().run(
+ System.in,
+ System.out,
+ System.err,
+ args);
+ }
+
+ @Test
+ public void testHelpfulMessageWhenNoArgsGiven() throws Exception {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream(1024);
+ PrintStream out = new PrintStream(buffer);
+ int returnCode = new CatTool().run(
+ System.in,
+ out,
+ System.err,
+ Collections.<String>emptyList());
+ out.close(); // flushes too
+
+ assertEquals(0, returnCode);
+ assertTrue(
+ "should have lots of help",
+ buffer.toString().trim().length() > 200);
+ }
+}
Propchange: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
------------------------------------------------------------------------------
svn:eol-style = native