You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2013/04/27 00:12:16 UTC

svn commit: r1476418 - in /avro/trunk: ./ lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/tools/src/test/java/org/apache/avro/tool/

Author: cutting
Date: Fri Apr 26 22:12:16 2013
New Revision: 1476418

URL: http://svn.apache.org/r1476418
Log:
AVRO-1307. Java: Add 'cat' tool to append and sample data files.  Contributed by Vincenz Priesnitz.

Added:
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java   (with props)
    avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java   (with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Apr 26 22:12:16 2013
@@ -4,6 +4,9 @@ Trunk (not yet released)
 
   NEW FEATURES
 
+    AVRO-1307. Java: Add 'cat' tool to append and sample data files.
+    (Vincenz Priesnitz via cutting)
+
   IMPROVEMENTS
 
     AVRO-1260. Ruby: Improve read performance. (Martin Kleppmann via cutting)

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java?rev=1476418&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java Fri Apr 26 22:12:16 2013
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+
+/** Tool to extract samples from an Avro data file. */
+public class CatTool implements Tool {
+  
+  private long totalCopied;
+  private double sampleCounter;
+  
+  private GenericRecord reuse;
+  private DataFileStream<GenericRecord> reader;
+  private DataFileWriter<GenericRecord> writer;
+  private Schema schema;
+  private List<Path> inFiles;
+  private int currentInput;
+  
+  @Override
+  public int run(InputStream in, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+    OptionParser optParser = new OptionParser();
+    OptionSpec<Long> offsetOpt = optParser
+      .accepts("offset", "offset for reading input")
+      .withRequiredArg()
+      .ofType(Long.class)
+      .defaultsTo(new Long(0));
+    OptionSpec<Long> limitOpt = optParser
+      .accepts("limit", "maximum number of records in the outputfile")
+      .withRequiredArg()
+      .ofType(Long.class)
+      .defaultsTo(Long.MAX_VALUE); 
+    OptionSpec<Double> fracOpt = optParser
+      .accepts("samplerate", "rate at which records will be collected")
+      .withRequiredArg()
+      .ofType(Double.class)
+      .defaultsTo(new Double(1)); 
+
+    OptionSet opts = optParser.parse(args.toArray(new String[0]));
+    List<String> nargs = opts.nonOptionArguments();
+    if (nargs.size() < 2) {
+      printHelp(out);
+      return 0;
+    }
+    
+    inFiles = Util.getFiles(nargs.subList(0, nargs.size()-1));
+
+    System.out.println("List of input files:");
+    for (Path p : inFiles) {
+      System.out.println(p);
+    }
+    currentInput = -1;
+    nextInput();
+   
+    OutputStream output = out;
+    String lastArg = nargs.get(nargs.size()-1);
+    if (nargs.size() > 1 && !lastArg.equals("-")) {
+      output = Util.createFromFS(lastArg);
+    }
+    writer = new DataFileWriter<GenericRecord>(
+        new GenericDatumWriter<GenericRecord>());
+    
+    String codecName = reader.getMetaString(DataFileConstants.CODEC);
+    CodecFactory codec = (codecName == null)
+        ? CodecFactory.fromString(DataFileConstants.NULL_CODEC)
+        : CodecFactory.fromString(codecName);
+    writer.setCodec(codec);
+    for (String key : reader.getMetaKeys()) {
+      if (!DataFileWriter.isReservedMeta(key)) {
+        writer.setMeta(key, reader.getMeta(key));
+      }
+    }
+    writer.create(schema, output);
+    
+    long  offset = opts.valueOf(offsetOpt);
+    long limit = opts.valueOf(limitOpt);
+    double samplerate = opts.valueOf(fracOpt);
+    sampleCounter = 1;
+    totalCopied = 0;
+    reuse = null;
+    
+    if (limit < 0) {
+      System.out.println("limit has to be non-negative");
+      this.printHelp(out);
+      return 1;
+    }
+    if (offset < 0) {
+      System.out.println("offset has to be non-negative");
+      this.printHelp(out);
+      return 1;
+    }
+    if (samplerate < 0 || samplerate > 1) {
+      System.out.println("samplerate has to be a number between 0 and 1");
+      this.printHelp(out);
+      return 1;
+    }
+
+    skip(offset);
+    writeRecords(limit, samplerate);
+    System.out.println(totalCopied + " records written.");
+  
+    writer.flush();
+    writer.close();
+    Util.close(out);
+    return 0;
+  }
+  
+  private void nextInput() throws IOException{
+    currentInput++;
+    Path path = inFiles.get(currentInput); 
+    FSDataInputStream input = new FSDataInputStream(Util.openFromFS(path));
+    reader = new DataFileStream<GenericRecord>(input, new GenericDatumReader<GenericRecord>());
+    if (schema == null) {                            // if this is the first file, the schema gets saved
+      schema = reader.getSchema();
+    }
+    else if (!schema.equals(reader.getSchema())) {   // subsequent files have to have equal schemas
+      throw new IOException("schemas dont match");
+    }
+  }
+  
+  private boolean hasNextInput() {
+    return inFiles.size() > (currentInput + 1);
+  }
+  
+  /**skips a number of records from the input*/
+  private long skip(long skip) throws IOException {
+    long skipped = 0;
+    while( 0 < skip  && reader.hasNext()) {
+      reader.next(reuse);
+      skip--;
+      skipped++;
+    }
+    if ((0 < skip) && hasNextInput()) { // goto next file
+      nextInput();
+      skipped = skipped + skip(skip);
+    }
+  return skipped;
+}
+  
+  /** writes records with the given samplerate
+   * The record at position offset is guaranteed to be taken*/
+  private long writeRecords(long count, double samplerate) throws IOException {
+    long written = 0;
+    while(written < count && reader.hasNext()) {
+      reuse = reader.next(reuse);
+      sampleCounter = sampleCounter + samplerate;
+      if (sampleCounter >= 1) {
+        writer.append(reuse);
+        written++;
+        sampleCounter--;
+      }
+    }
+    totalCopied = totalCopied + written;
+    if (written < count && hasNextInput()) { // goto next file
+      nextInput();
+      written = written + writeRecords(count - written, samplerate);  
+    }
+    return written;
+  }
+  
+  private void printHelp(PrintStream out) {
+    out.println("cat --offset <offset> --limit <limit> --samplerate <samplerate> [input-files...] output-file");
+    out.println();
+    out.println("extracts records from a list of input files into a new file.");
+    out.println("--offset      start of the extract");
+    out.println("--limit       maximum number of records in the output file.");
+    out.println("--samplerate  rate at which records will be collected");
+    out.println("A dash ('-') can be given to direct output to stdout");
+  }
+
+  @Override
+  public String getName() {
+    return "cat";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "extracts samples from files";
+  }
+  
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CatTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java Fri Apr 26 22:12:16 2013
@@ -35,6 +35,7 @@ public class Main {
   Main() {
     tools = new TreeMap<String, Tool>();
     for (Tool tool : new Tool[] {
+        new CatTool(),
         new SpecificCompilerTool(),
         new InduceSchemaTool(),
         new JsonToBinaryFragmentTool(),

Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java?rev=1476418&r1=1476417&r2=1476418&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Util.java Fri Apr 26 22:12:16 2013
@@ -23,6 +23,9 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
@@ -30,6 +33,8 @@ import org.apache.avro.generic.GenericDa
 import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.mapred.FsInput;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
 /** Static utility methods for tools. */
@@ -75,6 +80,17 @@ class Util {
   }
   
   /**
+   * Returns an InputStream for the file using the owning filesystem,
+   * or the default if none is given.
+   * @param filename The filename to be opened
+   * @throws IOException 
+   */
+  static InputStream openFromFS(Path filename) 
+      throws IOException {
+    return filename.getFileSystem(new Configuration()).open(filename);
+  }
+  
+  /**
    * Returns a seekable FsInput using the owning filesystem, 
    * or the default if none is given.
    * @param filename The filename to be opened
@@ -128,6 +144,54 @@ class Util {
     }
   }
   
+  /**If pathname is a file, this method returns a list with a single absolute Path to that file,
+   * if pathname is a directory, this method returns a list of Pathes to all the files within
+   * this directory.
+   * Only files inside that directory are included, no subdirectories or files in subdirectories
+   * will be added. 
+   * The List is sorted alphabetically.
+   * @param fileOrDirName filename or directoryname
+   * @return A Path List 
+   * @throws IOException
+   */
+  static List<Path> getFiles(String fileOrDirName) 
+    throws IOException {
+    List<Path> pathList = new ArrayList<Path>();  
+    Path path = new Path(fileOrDirName);
+    FileSystem fs = path.getFileSystem(new Configuration());
+    
+    if (fs.isFile(path)) {
+      pathList.add(path);
+    }
+    else if (fs.getFileStatus(path).isDir()) {
+      for (FileStatus status : fs.listStatus(path)) {
+        if(!status.isDir()) {
+          pathList.add(status.getPath());
+        }
+      }
+    }
+    Collections.sort(pathList);
+    return pathList;
+  }
+  
+  /**
+   * This method returns a list which contains a path to every given file
+   * in the input and a path to every file inside a given directory.
+   * The list is sorted alphabetically and contains no subdirectories or files within those.
+   * @param fileOrDirNames A list of filenames and directorynames
+   * @return A list of Pathes, one for each file 
+   * @throws IOException
+   */
+  static List<Path> getFiles(List<String> fileOrDirNames) 
+      throws IOException {
+    ArrayList<Path> pathList = new ArrayList<Path>();
+    for(String name : fileOrDirNames) {
+      pathList.addAll(getFiles(name));
+    }
+    Collections.sort(pathList);
+    return pathList;
+  }
+  
   /** 
    * Converts a String JSON object into a generic datum.
    * 

Added: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java?rev=1476418&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java (added)
+++ avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java Fri Apr 26 22:12:16 2013
@@ -0,0 +1,347 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.junit.Test;
+
+public class TestCatTool {
+  private static final int ROWS_IN_INPUT_FILES = 100000;
+  private static final int OFFSET = 1000;
+  private static final int LIMIT_WITHIN_INPUT_BOUNDS = 100;
+  private static final int LIMIT_OUT_OF_INPUT_BOUNDS = 100001;
+  private static final double SAMPLERATE = .01;
+  private static final double SAMPLERATE_TOO_SMALL = .00000001;
+  
+  private final Schema INTSCHEMA = new Schema.Parser().parse(
+    "{\"type\":\"record\", " +
+    "\"name\":\"myRecord\", " +
+    "\"fields\":[ " +
+    "{\"name\":\"value\",\"type\":\"int\"} " +
+    "]}");
+  private final Schema STRINGSCHEMA = new Schema.Parser().parse(
+    "{\"type\":\"record\", " +
+    "\"name\":\"myRecord\", " +
+    "\"fields\":[ {\"name\":\"value\",\"type\":\"string\"} " +
+    "]}");
+  private static final CodecFactory DEFLATE = CodecFactory.deflateCodec(9);
+  private static final CodecFactory SNAPPY = CodecFactory.snappyCodec();
+  
+
+  private GenericRecord aDatum(Type ofType, int forRow) {
+    GenericRecord record = null;
+    switch (ofType) {
+      case STRING:
+        record = new GenericData.Record(STRINGSCHEMA);
+        record.put("value", String.valueOf(forRow % 100));
+        return record;
+      case INT:
+        record = new GenericData.Record(INTSCHEMA);
+        record.put("value", forRow);
+        return record;      
+      default:
+       throw new AssertionError("I can't generate data for this type");
+    }
+  }
+
+  private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception {
+    File inputFile = AvroTestUtil.tempFile(getClass(), file);
+    inputFile.deleteOnExit();
+
+    Schema schema = null;
+    if(type.equals(Schema.Type.INT)) {
+      schema = INTSCHEMA;
+    }
+    if(type.equals(Schema.Type.STRING)) {
+      schema = STRINGSCHEMA;
+    }
+       
+    DataFileWriter<Object> writer = new DataFileWriter<Object>(
+              new GenericDatumWriter<Object>(schema));
+    for(Entry<String, String> metadatum : metadata.entrySet()) {
+        writer.setMeta(metadatum.getKey(), metadatum.getValue());
+    }
+    writer.setCodec(codec);
+    writer.create(schema, inputFile);
+
+    for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) {
+      writer.append(aDatum(type, i));
+    }
+    writer.close();
+
+    return inputFile;
+  }
+ 
+  
+  private int getFirstIntDatum(File file) throws Exception {
+    DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new FileInputStream(file) , 
+      new GenericDatumReader<GenericRecord>());
+
+    int result = (Integer) reader.next().get(0);
+    System.out.println(result);
+    reader.close();
+    return result;
+  }
+
+  private int numRowsInFile(File output) throws Exception {
+    DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
+      new FileInputStream(output),
+      new GenericDatumReader<GenericRecord>());
+    Iterator<GenericRecord> rows = reader.iterator();
+    int rowcount = 0;
+    while(rows.hasNext()) {
+      ++rowcount;
+      rows.next();
+    }
+    reader.close();
+    return rowcount;
+  }
+
+  @Test
+  public void testCat() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File input2 = generateData("input2.avro", Type.INT, metadata, SNAPPY);
+    File input3 = generateData("input3.avro", Type.INT, metadata, DEFLATE);
+
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+//    file input
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      input2.getAbsolutePath(),
+      input3.getAbsolutePath(),
+      "--offset" , String.valueOf(OFFSET),
+      "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS),
+      "--samplerate" , String.valueOf(SAMPLERATE),
+      output.getAbsolutePath());
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+
+    assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
+    
+//    folder input
+    args = asList(
+      input1.getParentFile().getAbsolutePath(),
+      output.getAbsolutePath(),
+      "--offset" , String.valueOf(OFFSET),
+      "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS));
+    returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    assertEquals(LIMIT_WITHIN_INPUT_BOUNDS, numRowsInFile(output));
+  }
+
+  
+  @Test
+  public void testLimitOutOfBounds() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      "--offset=" + String.valueOf(OFFSET),
+      "--limit=" + String.valueOf(LIMIT_OUT_OF_INPUT_BOUNDS),
+      output.getAbsolutePath());
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    assertEquals(ROWS_IN_INPUT_FILES - OFFSET, numRowsInFile(output)); 
+  }
+  
+  @Test
+  public void testSamplerateAccuracy() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String>args = asList(
+      input1.getAbsolutePath(),
+      output.getAbsolutePath(),
+      "--offset" , String.valueOf(OFFSET),
+      "--samplerate" , String.valueOf(SAMPLERATE));
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    
+    assertTrue("Outputsize is not roughly (Inputsize - Offset) * samplerate",
+      (ROWS_IN_INPUT_FILES - OFFSET)*SAMPLERATE - numRowsInFile(output) < 2);    
+    assertTrue("", (ROWS_IN_INPUT_FILES - OFFSET)*SAMPLERATE - numRowsInFile(output) > -2);
+  }
+
+  @Test
+  public void testOffSetAccuracy() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      "--offset" , String.valueOf(OFFSET),
+      "--limit" , String.valueOf(LIMIT_WITHIN_INPUT_BOUNDS),
+      "--samplerate" , String.valueOf(SAMPLERATE),
+      output.getAbsolutePath());
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    assertEquals("output does not start at offset",
+      OFFSET, getFirstIntDatum(output));
+  }
+  
+  @Test
+  public void testOffsetBiggerThanInput() throws Exception{
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      "--offset" , String.valueOf(ROWS_IN_INPUT_FILES + 1),
+      output.getAbsolutePath());
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    assertEquals("output is not empty",
+      0, numRowsInFile(output));
+  }
+  
+  @Test
+  public void testSamplerateSmallerThanInput() throws Exception{
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.INT, metadata, DEFLATE);
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      output.getAbsolutePath(),
+      "--offset=" +  new Integer(OFFSET).toString(),
+      "--samplerate=" + new Double(SAMPLERATE_TOO_SMALL).toString());
+    int returnCode = new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+    assertEquals(0, returnCode);
+    
+    assertEquals("output should only contain the record at offset",
+      (int) OFFSET, getFirstIntDatum(output));
+  }
+  
+  
+  @Test(expected = IOException.class)
+  public void testDifferentSchemasFail() throws Exception {
+    Map<String, String> metadata = new HashMap<String, String>();
+    metadata.put("myMetaKey", "myMetaValue");
+
+    File input1 = generateData("input1.avro", Type.STRING, metadata, DEFLATE);
+    File input2 = generateData("input2.avro", Type.INT, metadata, DEFLATE);
+
+    File output = AvroTestUtil.tempFile(getClass(), "out/default-output.avro");
+    output.deleteOnExit();
+
+    List<String> args = asList(
+      input1.getAbsolutePath(),
+      input2.getAbsolutePath(),
+      output.getAbsolutePath());
+    new CatTool().run(
+      System.in,
+      System.out,
+      System.err,
+      args);
+  }
+
+  @Test
+  public void testHelpfulMessageWhenNoArgsGiven() throws Exception {
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream(1024);
+    PrintStream out = new PrintStream(buffer);
+    int returnCode = new CatTool().run(
+      System.in,
+      out,
+      System.err,
+      Collections.<String>emptyList());
+    out.close(); // flushes too
+
+    assertEquals(0, returnCode);
+    assertTrue(
+      "should have lots of help",
+      buffer.toString().trim().length() > 200);
+  }
+}

Propchange: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCatTool.java
------------------------------------------------------------------------------
    svn:eol-style = native