You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2012/09/11 23:35:59 UTC

svn commit: r1383626 [1/3] - in /avro/trunk: ./ doc/src/content/xdocs/ lang/java/ lang/java/tools/ lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/trevni/ lang/java/trevni/avro/ lang/java/trevni/avro/src/ lang/java/trevni/avro/src/main/ l...

Author: cutting
Date: Tue Sep 11 21:35:56 2012
New Revision: 1383626

URL: http://svn.apache.org/viewvc?rev=1383626&view=rev
Log:
AVRO-806.  Add specification of the Trevni columnar file format and a Java implementation of it.

Added:
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java   (with props)
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java   (with props)
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java   (with props)
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java   (with props)
    avro/trunk/lang/java/trevni/   (with props)
    avro/trunk/lang/java/trevni/avro/   (with props)
    avro/trunk/lang/java/trevni/avro/pom.xml   (with props)
    avro/trunk/lang/java/trevni/avro/src/
    avro/trunk/lang/java/trevni/avro/src/main/
    avro/trunk/lang/java/trevni/avro/src/main/java/
    avro/trunk/lang/java/trevni/avro/src/main/java/org/
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html   (with props)
    avro/trunk/lang/java/trevni/avro/src/test/
    avro/trunk/lang/java/trevni/avro/src/test/cases/
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc
    avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json
    avro/trunk/lang/java/trevni/avro/src/test/java/
    avro/trunk/lang/java/trevni/avro/src/test/java/org/
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java   (with props)
    avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java   (with props)
    avro/trunk/lang/java/trevni/checkstyle.xml   (with props)
    avro/trunk/lang/java/trevni/core/   (with props)
    avro/trunk/lang/java/trevni/core/pom.xml   (with props)
    avro/trunk/lang/java/trevni/core/src/
    avro/trunk/lang/java/trevni/core/src/main/
    avro/trunk/lang/java/trevni/core/src/main/java/
    avro/trunk/lang/java/trevni/core/src/main/java/org/
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html   (with props)
    avro/trunk/lang/java/trevni/core/src/main/java/overview.html   (with props)
    avro/trunk/lang/java/trevni/core/src/test/
    avro/trunk/lang/java/trevni/core/src/test/java/
    avro/trunk/lang/java/trevni/core/src/test/java/org/
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java   (with props)
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java   (with props)
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java   (with props)
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java   (with props)
    avro/trunk/lang/java/trevni/doc/   (with props)
    avro/trunk/lang/java/trevni/doc/apt/
    avro/trunk/lang/java/trevni/doc/apt/spec.apt   (with props)
    avro/trunk/lang/java/trevni/doc/pom.xml   (with props)
    avro/trunk/lang/java/trevni/doc/resources/
    avro/trunk/lang/java/trevni/doc/resources/css/
    avro/trunk/lang/java/trevni/doc/resources/css/site.css   (with props)
    avro/trunk/lang/java/trevni/doc/site.xml   (with props)
    avro/trunk/lang/java/trevni/pom.xml   (with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/build.sh
    avro/trunk/doc/src/content/xdocs/site.xml
    avro/trunk/lang/java/pom.xml
    avro/trunk/lang/java/tools/pom.xml
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
    avro/trunk/pom.xml

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Sep 11 21:35:56 2012
@@ -4,6 +4,9 @@ Avro 1.7.2 (unreleased)
 
   NEW FEATURES
 
+    AVRO-806.  Add specification of the Trevni columnar file format
+    and a Java implementation of it. (cutting)
+
   IMPROVEMENTS
 
     AVRO-1146. Java: Serialize several built-in Java classes as

Modified: avro/trunk/build.sh
URL: http://svn.apache.org/viewvc/avro/trunk/build.sh?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/build.sh (original)
+++ avro/trunk/build.sh Tue Sep 11 21:35:56 2012
@@ -94,6 +94,7 @@ case "$target" in
 	# build lang-specific artifacts
         
 	(cd lang/java; mvn -P dist package -DskipTests -Davro.version=$VERSION javadoc:aggregate) 
+        (cd lang/java/trevni/doc; mvn site)
         (mvn -N -P copy-artifacts antrun:run) 
 
 	(cd lang/py; ant dist)

Modified: avro/trunk/doc/src/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/avro/trunk/doc/src/content/xdocs/site.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/doc/src/content/xdocs/site.xml (original)
+++ avro/trunk/doc/src/content/xdocs/site.xml Tue Sep 11 21:35:56 2012
@@ -42,6 +42,7 @@ See http://forrest.apache.org/docs/linki
   <docs label="Documentation"> 
     <overview   label="Overview"          href="index.html" />
     <spec       label="Specification"     href="spec.html" />
+    <trevni     label="Trevni"            href="ext:trevni/spec" />
     <java-api   label="Java API"          href="ext:api/java/index" />
     <c-api      label="C API"             href="ext:api/c/index" />
     <cpp-api    label="C++ API"           href="ext:api/cpp/index" />
@@ -79,6 +80,9 @@ See http://forrest.apache.org/docs/linki
 	<index href="index.html" />
       </java>
     </api>
+    <trevni href="trevni/">
+      <spec href="spec.html"/>
+    </trevni>
   </external-refs>
  
 </site>

Modified: avro/trunk/lang/java/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/pom.xml (original)
+++ avro/trunk/lang/java/pom.xml Tue Sep 11 21:35:56 2012
@@ -63,6 +63,7 @@
     <jar-plugin.version>2.3.2</jar-plugin.version>
     <javacc-plugin.version>2.6</javacc-plugin.version>
     <javadoc-plugin.version>2.8</javadoc-plugin.version>
+    <maven-site-plugin.version>3.1</maven-site-plugin.version>
     <plugin-plugin.version>2.9</plugin-plugin.version>
     <source-plugin.version>2.1.2</source-plugin.version>
     <surefire-plugin.version>2.12</surefire-plugin.version>
@@ -76,6 +77,7 @@
     <module>compiler</module>
     <module>maven-plugin</module>
     <module>ipc</module>
+    <module>trevni</module>
     <module>tools</module>
     <module>mapred</module>
     <module>protobuf</module>

Modified: avro/trunk/lang/java/tools/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/pom.xml (original)
+++ avro/trunk/lang/java/tools/pom.xml Tue Sep 11 21:35:56 2012
@@ -110,6 +110,28 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>org.apache.trevni</groupId>
+      <artifactId>trevni-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.trevni</groupId>
+      <artifactId>trevni-avro</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.trevni</groupId>
+      <artifactId>trevni-core</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.trevni</groupId>
+      <artifactId>trevni-avro</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.codehaus.jackson</groupId>
       <artifactId>jackson-core-asl</artifactId>
       <version>${jackson.version}</version>

Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java Tue Sep 11 21:35:56 2012
@@ -49,7 +49,10 @@ public class Main {
         new RpcSendTool(),
         new FromTextTool(),
         new ToTextTool(),
-        new TetherTool()
+        new TetherTool(),
+        new TrevniCreateRandomTool(),
+        new TrevniMetadataTool(),
+        new TrevniToJsonTool()
         }) {
       Tool prev = tools.put(tool.getName(), tool);
       if (prev != null) {

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.trevni.ColumnFileMetaData;
+import org.apache.trevni.avro.AvroColumnWriter;
+import org.apache.trevni.avro.RandomData;
+
+/** Tool to create randomly populated Trevni file based on an Avro schema */
+public class TrevniCreateRandomTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "trevni_random";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Create a Trevni file filled with random instances of a schema.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+                 List<String> args) throws Exception {
+    if (args.size() != 3) {
+      err.println("Usage: schemaFile count outputFile");
+      return 1;
+    }
+
+    File schemaFile = new File(args.get(0));
+    int count = Integer.parseInt(args.get(1));
+    File outputFile = new File(args.get(2));
+
+    Schema schema = Schema.parse(schemaFile);
+
+    AvroColumnWriter<Object> writer =
+      new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
+
+    for (Object datum : new RandomData(schema, count))
+      writer.write(datum);
+
+    writer.writeTo(outputFile);
+
+    return 0;
+  }
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.MetaData;
+import org.apache.trevni.ColumnMetaData;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
+
+/** Tool to print Trevni file metadata as JSON. */
+public class TrevniMetadataTool implements Tool {
+  static final JsonFactory FACTORY = new JsonFactory();
+
+  private JsonGenerator generator;
+  private ColumnFileReader reader;
+
+  @Override
+  public String getName() {
+    return "trevni_meta";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Dumps a Trevni file's metadata as JSON.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+                 List<String> args) throws Exception {
+    String filename;
+    boolean pretty = false;
+    if (args.size() == 2 && "-pretty".equals(args.get(0))) {
+      pretty = true;
+      filename = args.get(1);
+    } else if (args.size() == 1) {
+      filename = args.get(0);
+    } else {
+      err.println("Usage: [-pretty] input");
+      return 1;
+    }
+    
+    dump(TrevniUtil.input(filename), out, pretty);
+
+    return 0;
+  }
+
+  /** Read a Trevni file and print each row as a JSON object. */
+  public void dump(Input input, PrintStream out, boolean pretty)
+    throws IOException {
+    this.generator = FACTORY.createJsonGenerator(out, JsonEncoding.UTF8);
+    if (pretty) {
+      generator.useDefaultPrettyPrinter();
+    } else {                                      // ensure newline separation
+      MinimalPrettyPrinter pp = new MinimalPrettyPrinter();
+      pp.setRootValueSeparator(System.getProperty("line.separator"));
+      generator.setPrettyPrinter(pp);
+    }
+
+    this.reader = new ColumnFileReader(input);
+
+    generator.writeStartObject();
+    generator.writeNumberField("rowCount", reader.getRowCount());
+    generator.writeNumberField("columnCount", reader.getColumnCount());
+
+    generator.writeFieldName("metadata");
+    dump(reader.getMetaData());
+
+    generator.writeFieldName("columns");
+    generator.writeStartArray();
+    for (ColumnMetaData c : reader.getColumnMetaData())
+      dump(c);
+    generator.writeEndArray();
+
+    generator.writeEndObject();
+
+    generator.flush();
+    out.println();
+    reader.close();
+  }
+
+  private void dump(MetaData<?> meta) throws IOException {
+    generator.writeStartObject();
+    for (Map.Entry<String,byte[]> e : meta.entrySet())
+      generator.writeStringField(e.getKey(),
+                                 new String(e.getValue(), "ISO-8859-1"));
+    generator.writeEndObject();
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnValues;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
+
+/** Tool to read Trevni files and print them as JSON.
+ * This can read any Trevni file.  Nested structure is reconstructed from the
+ * columns rather than any schema information.
+ */
+public class TrevniToJsonTool implements Tool {
+  static final JsonFactory FACTORY = new JsonFactory();
+
+  private JsonGenerator generator;
+  private ColumnFileReader reader;
+  private ColumnValues[] values;
+  private String[] shortNames;
+
+  @Override
+  public String getName() {
+    return "trevni_tojson";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Dumps a Trevni file as JSON.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+                 List<String> args) throws Exception {
+    String filename;
+    boolean pretty = false;
+    if (args.size() == 2 && "-pretty".equals(args.get(0))) {
+      pretty = true;
+      filename = args.get(1);
+    } else if (args.size() == 1) {
+      filename = args.get(0);
+    } else {
+      err.println("Usage: [-pretty] input");
+      return 1;
+    }
+    
+    toJson(TrevniUtil.input(filename), out, pretty);
+
+    return 0;
+  }
+
+  /** Read a Trevni file and print each row as a JSON object. */
+  public void toJson(Input input, PrintStream out, boolean pretty)
+    throws IOException {
+    this.generator = FACTORY.createJsonGenerator(out, JsonEncoding.UTF8);
+    if (pretty) {
+      generator.useDefaultPrettyPrinter();
+    } else {                                      // ensure newline separation
+      MinimalPrettyPrinter pp = new MinimalPrettyPrinter();
+      pp.setRootValueSeparator(System.getProperty("line.separator"));
+      generator.setPrettyPrinter(pp);
+    }
+
+    this.reader = new ColumnFileReader(input);
+
+    int columnCount = (int)reader.getColumnCount();
+    this.values = new ColumnValues[columnCount];
+    this.shortNames = new String[columnCount];
+    for (int i = 0; i < columnCount; i++) {
+      values[i] = reader.getValues(i);
+      shortNames[i] = shortName(reader.getColumnMetaData(i));
+    }
+
+    List<ColumnMetaData> roots = reader.getRoots();
+    for (long row = 0; row < reader.getRowCount(); row++) {
+      for (ColumnValues v : values)
+        v.startRow();
+      generator.writeStartObject();
+      for (ColumnMetaData root : roots)
+        valueToJson(root);
+      generator.writeEndObject();
+    }
+    generator.flush();
+    out.println();
+    reader.close();
+  }
+  
+  private void valueToJson(ColumnMetaData column) throws IOException {
+    generator.writeFieldName(shortNames[column.getNumber()]);
+    ColumnValues in = values[column.getNumber()];
+    if (!column.isArray()) {
+      primitiveToJson(column, in.nextValue());
+    } else {
+      generator.writeStartArray();
+      int length = in.nextLength();
+      for (int i = 0; i < length; i++) {
+        Object value = in.nextValue();
+        List<ColumnMetaData> children = column.getChildren();
+        if (children.size() == 0) {
+          primitiveToJson(column, value);
+        } else {
+          generator.writeStartObject();
+          if (value != null) {
+            generator.writeFieldName("value$");
+            primitiveToJson(column, value);
+          }
+          for (ColumnMetaData child : children)
+            valueToJson(child);
+          generator.writeEndObject();
+        }
+      }
+      generator.writeEndArray();
+    }
+  }
+
+  private void primitiveToJson(ColumnMetaData column, Object value) 
+    throws IOException {
+    switch (column.getType()) {
+    case NULL:
+      generator.writeNull();                        break;
+    case INT:
+      generator.writeNumber((Integer)value);        break;
+    case LONG:
+      generator.writeNumber((Long)value);           break;
+    case FIXED32:
+      generator.writeNumber((Integer)value);        break;
+    case FIXED64:
+      generator.writeNumber((Long)value);           break;
+    case FLOAT:
+      generator.writeNumber((Float)value);          break;
+    case DOUBLE:
+      generator.writeNumber((Double)value);         break;
+    case STRING:
+      generator.writeString((String)value);         break;
+    case BYTES:
+      generator.writeBinary((byte[])value);
+      break;
+    default:
+      throw new RuntimeException("Unknown value type: "+column.getType());
+    }
+  }
+
+  // trim off portion of name shared with parent
+  private String shortName(ColumnMetaData column) {
+    String name = column.getName();
+    ColumnMetaData parent = column.getParent();
+    if (parent != null && name.startsWith(parent.getName()))
+      name = name.substring(parent.getName().length());
+    if (!Character.isLetterOrDigit(name.charAt(0)))
+      name = name.substring(1);
+    return name;
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.avro.HadoopInput;
+import org.apache.trevni.InputFile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/** Static utility methods for tools. */
+class TrevniUtil {
+
+  static Input input(String filename) throws IOException {
+    if (filename.startsWith("hdfs://")) {
+      return new HadoopInput(new Path(filename), new Configuration());
+    } else {
+      return new InputFile(new File(filename));
+    }
+  }
+  
+  /**
+   * Returns stdin if filename is "-", else opens the local or HDFS file
+   * and returns an InputStream for it.
+   * @throws IOException 
+   */
+  static InputStream input(String filename, InputStream stdin)
+    throws IOException {
+    if (filename.equals("-"))
+      return new BufferedInputStream(stdin);
+    else if (filename.startsWith("hdfs://")) {
+      FileSystem fs = FileSystem.get(URI.create(filename), new Configuration());
+      return new BufferedInputStream(fs.open(new Path(filename)));
+    } else {
+      return new BufferedInputStream(new FileInputStream(new File(filename)));
+    }
+  }
+  
+  /**
+   * Returns stdout if filename is "-", else opens the local or HDFS file
+   * and returns an OutputStream for it.
+   * @throws IOException 
+   */
+  static OutputStream output(String filename, OutputStream stdout) 
+    throws IOException {
+    if (filename.equals("-"))
+      return new BufferedOutputStream(stdout);
+    else if (filename.startsWith("hdfs://")) {
+      FileSystem fs = FileSystem.get(URI.create(filename), new Configuration());
+      return new BufferedOutputStream(fs.create(new Path(filename)));
+    } else {
+      return new BufferedOutputStream(new FileOutputStream(new File(filename)));
+    }
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: avro/trunk/lang/java/trevni/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target

Propchange: avro/trunk/lang/java/trevni/avro/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target

Added: avro/trunk/lang/java/trevni/avro/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/pom.xml (added)
+++ avro/trunk/lang/java/trevni/avro/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+  xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>trevni-java</artifactId>
+    <groupId>org.apache.trevni</groupId>
+    <version>1.7.2-SNAPSHOT</version>
+    <relativePath>../</relativePath>
+  </parent>
+
+  <artifactId>trevni-avro</artifactId>
+  <name>Trevni Java Avro</name>
+  <url>http://avro.apache.org/</url>
+  <description>Trevni Java Avro</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>trevni-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>trevni-core</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <version>${project.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-core</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>compile</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+  </profiles>
+
+</project>
+

Propchange: avro/trunk/lang/java/trevni/avro/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.ColumnValues;
+import org.apache.trevni.Input;
+import org.apache.trevni.InputFile;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericData;
+
+import static org.apache.trevni.avro.AvroColumnator.isSimple;
+
+/** Read files written with {@link AvroColumnWriter}.  A subset of the schema
+ * used for writing may be specified when reading.  In this case only columns
+ * of the subset schema are read. */
+public class AvroColumnReader<D>
+  implements Iterator<D>, Iterable<D>, Closeable {
+
+  private ColumnFileReader reader;
+  private GenericData model;
+  private Schema fileSchema;
+  private Schema readSchema;
+  
+  private ColumnValues[] values;
+  private int[] arrayWidths;
+  private int column;                          // current index in values
+
+  /** Parameters for reading an Avro column file. */
+  public static class Params {
+    Input input;
+    Schema schema;
+    GenericData model = GenericData.get();
+
+    /** Construct reading from a file. */
+    public Params(File file) throws IOException {
+      this(new InputFile(file));
+    }
+
+    /** Construct reading from input. */
+    public Params(Input input) { this.input = input; }
+
+    /** Set subset schema to project data down to. */
+    public Params setSchema(Schema schema) {
+      this.schema = schema;
+      return this;
+    }
+
+    /** Set data representation. */
+    public Params setModel(GenericData model) {
+      this.model = model;
+      return this;
+    }
+  }
+
+  /** Construct a reader for a file. */
+  public AvroColumnReader(Params params)
+    throws IOException {
+    this.reader = new ColumnFileReader(params.input);
+    this.model = params.model;
+    this.fileSchema =
+      Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY));
+    this.readSchema = params.schema == null ? fileSchema : params.schema;
+    initialize();
+  }
+
+  /** Return the schema for data in this file. */
+  public Schema getFileSchema() { return fileSchema; }
+
+  void initialize() throws IOException {
+    // compute a mapping from column name to number for file
+    Map<String,Integer> fileColumnNumbers = new HashMap<String,Integer>();
+    int i = 0;
+    for (ColumnMetaData c : new AvroColumnator(fileSchema).getColumns())
+      fileColumnNumbers.put(c.getName(), i++);
+
+    // create iterator for each column in readSchema
+    AvroColumnator readColumnator = new AvroColumnator(readSchema);
+    this.arrayWidths = readColumnator.getArrayWidths();
+    ColumnMetaData[] readColumns = readColumnator.getColumns();
+    this.values = new ColumnValues[readColumns.length];
+    int j = 0;
+    for (ColumnMetaData c : readColumns) {
+      Integer n = fileColumnNumbers.get(c.getName());
+      if (n == null)
+        throw new TrevniRuntimeException("No column named: "+c.getName());
+      values[j++] = reader.getValues(n);
+    }
+  }
+
+  @Override
+  public Iterator<D> iterator() { return this; }
+
+  @Override
+  public boolean hasNext() {
+    return values[0].hasNext();
+  }
+
+  /** Return the number of rows in this file. */
+  public long getRowCount() { return reader.getRowCount(); }
+
+  @Override
+  public D next() {
+    try {
+      for (int i = 0; i < values.length; i++)
+        values[i].startRow();
+      this.column = 0;
+      return (D)read(readSchema);
+    } catch (IOException e) {
+      throw new TrevniRuntimeException(e);
+    }
+  }
+
+  private Object read(Schema s) throws IOException {
+    if (isSimple(s))
+      return nextValue(s, column++);
+
+    final int startColumn = column;
+
+    switch (s.getType()) {
+    case MAP: 
+      int size = values[column].nextLength();
+      Map map = (Map)new HashMap(size);
+      for (int i = 0; i < size; i++) {
+        this.column = startColumn;
+        values[column++].nextValue();                      // null in parent
+        String key = (String)values[column++].nextValue(); // key
+        map.put(key, read(s.getValueType()));              // value
+      }
+      column = startColumn + arrayWidths[startColumn];
+      return map;
+    case RECORD: 
+      Object record = model.newRecord(null, s);
+      for (Field f : s.getFields())
+        model.setField(record, f.name(), f.pos(), read(f.schema()));
+      return record;
+    case ARRAY: 
+      int length = values[column].nextLength();
+      List elements = (List)new GenericData.Array(length, s);
+      for (int i = 0; i < length; i++) {
+        this.column = startColumn;
+        Object value = nextValue(s, column++);
+        if (!isSimple(s.getElementType()))
+          value = read(s.getElementType());
+        elements.add(value);
+      }
+      column = startColumn + arrayWidths[startColumn];
+      return elements;
+    case UNION:
+      Object value = null;
+      for (Schema branch : s.getTypes()) {
+        if (branch.getType() == Schema.Type.NULL) continue;
+        if (values[column].nextLength() == 1) {
+          value = nextValue(s, column);
+          column++;
+          if (!isSimple(branch))
+            value = read(branch);
+        } else {
+          column += arrayWidths[column];
+        }
+      }
+      return value;
+    default:
+      throw new TrevniRuntimeException("Unknown schema: "+s);
+    }
+  }
+
+  private Object nextValue(Schema s, int column) throws IOException {
+    Object v = values[column].nextValue();
+    
+    switch (s.getType()) {
+    case ENUM:
+      return new GenericData.EnumSymbol(s, s.getEnumSymbols().get((Integer)v));
+    case FIXED:
+      return new GenericData.Fixed(s, ((ByteBuffer)v).array());
+    }
+
+    return v;
+  }
+
+  @Override
+  public void remove() { throw new UnsupportedOperationException(); }
+
+  @Override
+  public void close() throws IOException {
+    reader.close();
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.File;
+import java.io.OutputStream;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.trevni.ColumnFileMetaData;
+import org.apache.trevni.ColumnFileWriter;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.util.Utf8;
+
+import static org.apache.trevni.avro.AvroColumnator.isSimple;
+
+/** Write Avro records to a Trevni column file.
+ *
+ * <p>Each primitive type is written to a separate column.
+ *
+ * <p>Output is buffered until {@link #writeTo(OutputStream)} is called.  The
+ * {@link #sizeEstimate()} indicates both the amount of data buffered and the
+ * size of the file that will be written.
+ */
+public class AvroColumnWriter<D> {
+  private Schema schema;
+  private GenericData model;
+  private ColumnFileWriter writer;
+  private int[] arrayWidths;
+
+  public static final String SCHEMA_KEY = "avro.schema";
+
+  public AvroColumnWriter(Schema s, ColumnFileMetaData meta)
+    throws IOException {
+    this(s, meta, GenericData.get());
+  }
+
+  public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model)
+    throws IOException {
+    this.schema = s;
+    AvroColumnator columnator = new AvroColumnator(s);
+    meta.set(SCHEMA_KEY, s.toString());           // save schema in file
+    this.writer = new ColumnFileWriter(meta, columnator.getColumns());
+    this.arrayWidths = columnator.getArrayWidths();
+    this.model = model;
+  }
+
+  /** Return the approximate size of the file that will be written.  Tries to
+   * slightly over-estimate.  Indicates both the size in memory of the buffered
+   * data as well as the size of the file that will be written by {@link
+   * #writeTo(OutputStream)}. */
+  public long sizeEstimate() { return writer.sizeEstimate(); }
+
+  /** Write all rows added to the named output stream. */
+  public void writeTo(OutputStream out) throws IOException {
+    writer.writeTo(out);
+  }
+
+  /** Write all rows added to the named file. */
+  public void writeTo(File file) throws IOException {
+    writer.writeTo(file);
+  }
+
+  /** Add a row to the file. */
+  public void write(D value) throws IOException {
+    writer.startRow();
+    int count = write(value, schema, 0);
+    assert(count == writer.getColumnCount());
+    writer.endRow();
+  }
+  
+  private int write(Object o, Schema s, int column) throws IOException {
+    if (isSimple(s)) {
+      writeValue(o, s, column);
+      return column+1;
+    }
+    switch (s.getType()) {
+    case MAP: 
+      Map<?,?> map = (Map)o;
+      writer.writeLength(map.size(), column);
+      for (Map.Entry e : map.entrySet()) {
+        writer.writeValue(null, column);
+        writer.writeValue(e.getKey(), column+1);
+        int c = write(e.getValue(), s.getValueType(), column+2);
+        assert(c == column+arrayWidths[column]);
+      }
+      return column+arrayWidths[column];
+    case RECORD: 
+      for (Field f : s.getFields())
+        column = write(model.getField(o,f.name(),f.pos()), f.schema(), column);
+      return column;
+    case ARRAY: 
+      Collection elements = (Collection)o;
+      writer.writeLength(elements.size(), column);
+      if (isSimple(s.getElementType())) {         // optimize simple arrays
+        for (Object element : elements)
+          writeValue(element, s.getElementType(), column);
+        return column+1;
+      }
+      for (Object element : elements) {
+        writer.writeValue(null, column);
+        int c = write(element, s.getElementType(), column+1);
+        assert(c == column+arrayWidths[column]);
+      }
+      return column+arrayWidths[column];
+    case UNION:
+      int b = model.resolveUnion(s, o);
+      int i = 0;
+      for (Schema branch : s.getTypes()) {
+        boolean selected = i++ == b;
+        if (branch.getType() == Schema.Type.NULL) continue;
+        if (!selected) {
+          writer.writeLength(0, column);
+          column+=arrayWidths[column];
+        } else {
+          writer.writeLength(1, column);
+          if (isSimple(branch)) {
+            writeValue(o, branch, column++);
+          } else {
+            writer.writeValue(null, column);
+            column = write(o, branch, column+1);
+          }
+        }
+      }
+      return column;
+    default:
+      throw new TrevniRuntimeException("Unknown schema: "+s);
+    }
+  }
+
+  private void writeValue(Object value, Schema s, int column)
+    throws IOException {
+    
+    switch (s.getType()) {
+    case STRING:
+      if (value instanceof Utf8)                    // convert Utf8 to String
+        value = value.toString();
+      break;
+    case ENUM:
+      if (value instanceof Enum)
+        value = ((Enum)value).ordinal();
+      else 
+        value = s.getEnumOrdinal(value.toString());
+      break;
+    case FIXED:
+      value = ((GenericFixed)value).bytes();
+      break;
+    }
+    writer.writeValue(value, column);
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.IdentityHashMap;
+
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ValueType;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+
+/** Utility that computes the column layout of a schema. */
+class AvroColumnator {
+
+  private Schema schema;
+
+  private List<ColumnMetaData> columns = new ArrayList<ColumnMetaData>();
+  private List<Integer> arrayWidths = new ArrayList<Integer>();
+
+  public AvroColumnator(Schema schema) {
+    this.schema = schema;
+    columnize(null, schema, null, false);
+  }
+
+  /** Return columns for the schema. */
+  public ColumnMetaData[] getColumns() {
+    return columns.toArray(new ColumnMetaData[columns.size()]);
+  }
+
+  /** Return array giving the number of columns immediately following each
+   * column that are descendents of that column. */
+  public int[] getArrayWidths() {
+    int[] result = new int[arrayWidths.size()];
+    int i = 0;
+    for (Integer width : arrayWidths)
+      result[i++] = width;
+    return result;
+  }
+
+  private Map<Schema,Schema> seen = new IdentityHashMap<Schema,Schema>();
+
+  private void columnize(String path, Schema s,
+                         ColumnMetaData parent, boolean isArray) {
+
+    if (isSimple(s)) {
+      if (path == null) path = s.getFullName();
+      addColumn(path, simpleValueType(s), parent, isArray);
+      return;
+    }
+
+    if (seen.containsKey(s))                      // catch recursion
+      throw new TrevniRuntimeException("Cannot shred recursive schemas: "+s);
+    seen.put(s, s);
+    
+    switch (s.getType()) {
+    case MAP: 
+      path = path == null ? ">" : path+">";
+      int start = columns.size();
+      ColumnMetaData p = addColumn(path, ValueType.NULL, parent, true);
+      addColumn(p(path,"key", ""), ValueType.STRING, p, false);
+      columnize(p(path,"value", ""), s.getValueType(), p, false);
+      arrayWidths.set(start, columns.size()-start); // fixup with actual width
+      break;
+    case RECORD:
+      for (Field field : s.getFields())           // flatten fields to columns
+        columnize(p(path, field.name(), "#"), field.schema(), parent, isArray);
+      break;
+    case ARRAY: 
+      path = path == null ? "[]" : path+"[]";
+      addArrayColumn(path, s.getElementType(), parent);
+      break;
+    case UNION:
+      for (Schema branch : s.getTypes())          // array per non-null branch
+        if (branch.getType() != Schema.Type.NULL)
+          addArrayColumn(p(path, branch, "/"), branch, parent);
+      break;
+    default:
+      throw new TrevniRuntimeException("Unknown schema: "+s);
+    }
+  }
+
+  private String p(String parent, Schema child, String sep) {
+    if (child.getType() == Schema.Type.UNION)
+      return parent;
+    return p(parent, child.getFullName(), sep);
+  }
+
+  private String p(String parent, String child, String sep) {
+    return parent == null ? child : parent + sep + child;
+  }
+
+  private ColumnMetaData addColumn(String path, ValueType type,
+                                   ColumnMetaData parent, boolean isArray) {
+    ColumnMetaData column = new ColumnMetaData(path, type);
+    if (parent != null)
+      column.setParent(parent);
+    column.isArray(isArray);
+    columns.add(column);
+    arrayWidths.add(1);                           // placeholder
+    return column;
+ }
+
+  private void addArrayColumn(String path, Schema element,
+                              ColumnMetaData parent) {
+    if (path == null) path = element.getFullName();
+    if (isSimple(element)) {                      // optimize simple arrays
+      addColumn(path, simpleValueType(element), parent, true);
+      return;
+    }
+    // complex array: insert a parent column with lengths
+    int start = columns.size();
+    ColumnMetaData array = addColumn(path, ValueType.NULL, parent, true);
+    columnize(path, element, array, false); 
+    arrayWidths.set(start, columns.size()-start); // fixup with actual width
+  }
+
+  static boolean isSimple(Schema s) {
+    switch (s.getType()) {
+    case NULL:
+    case INT: case LONG:
+    case FLOAT: case DOUBLE: 
+    case BYTES: case STRING: 
+    case ENUM: case FIXED:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  private ValueType simpleValueType(Schema s) {
+    switch (s.getType()) {
+    case NULL:   return ValueType.NULL;
+    case INT:    return ValueType.INT;
+    case LONG:   return ValueType.LONG;
+    case FLOAT:  return ValueType.FLOAT;
+    case DOUBLE: return ValueType.DOUBLE;
+    case BYTES:  return ValueType.BYTES;
+    case STRING: return ValueType.STRING;
+    case ENUM:   return ValueType.INT;
+    case FIXED:  return ValueType.BYTES;
+    default:
+      throw new TrevniRuntimeException("Unknown schema: "+s);
+    }
+  }
+
+}    

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RecordReader;
+
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+
+/** An {@link org.apache.hadoop.mapred.InputFormat} for Trevni files.
+ *
+ * <p>A subset schema to be read may be specified with {@link
+ * AvroJob#setInputSchema(Schema)}.
+ */
+public class AvroTrevniInputFormat<T>
+  extends FileInputFormat<AvroWrapper<T>, NullWritable> {
+
+  @Override
+  protected boolean isSplitable(FileSystem fs, Path filename) {
+    return false;
+  }
+
+  @Override
+  protected FileStatus[] listStatus(JobConf job) throws IOException {
+    List<FileStatus> result = new ArrayList<FileStatus>();
+    job.setBoolean("mapred.input.dir.recursive", true);
+    for (FileStatus file : super.listStatus(job))
+      if (file.getPath().getName().endsWith(AvroTrevniOutputFormat.EXT))
+        result.add(file);
+    return result.toArray(new FileStatus[0]);
+  }
+
+  @Override
+  public RecordReader<AvroWrapper<T>, NullWritable>
+    getRecordReader(InputSplit split, final JobConf job,
+                    Reporter reporter) throws IOException {
+    final FileSplit file = (FileSplit)split;
+    reporter.setStatus(file.toString());
+
+    final AvroColumnReader.Params params =
+      new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
+    params.setModel(ReflectData.get());
+    if (job.get(AvroJob.INPUT_SCHEMA) != null)
+      params.setSchema(AvroJob.getInputSchema(job));
+
+    return new RecordReader<AvroWrapper<T>, NullWritable>() {
+      private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
+      private float rows = reader.getRowCount();
+      private long row;
+
+      public AvroWrapper<T> createKey() { return new AvroWrapper<T>(null); }
+  
+      public NullWritable createValue() { return NullWritable.get(); }
+    
+      public boolean next(AvroWrapper<T> wrapper, NullWritable ignore)
+        throws IOException {
+        if (!reader.hasNext())
+          return false;
+        wrapper.datum(reader.next());
+        row++;
+        return true;
+      }
+  
+      public float getProgress() throws IOException { return row / rows; }
+  
+      public long getPos() throws IOException { return row; }
+
+      public void close() throws IOException { reader.close(); }
+  
+    };
+
+  }
+
+}
+

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Map;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.util.Progressable;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+
+import org.apache.trevni.MetaData;
+import org.apache.trevni.ColumnFileMetaData;
+
+/** An {@link org.apache.hadoop.mapred.OutputFormat} that writes Avro data to
+ * Trevni files.
+ *
+ * <p>Writes a directory of files per task, each comprising a single filesystem
+ * block.  To reduce the number of files, increase the default filesystem block
+ * size for the job.  Each task also requires enough memory to buffer a
+ * filesystem block.
+ */
+public class AvroTrevniOutputFormat <T>
+  extends FileOutputFormat<AvroWrapper<T>, NullWritable> {
+
+  /** The file name extension for trevni files. */
+  public final static String EXT = ".trv";
+  
+  public static final String META_PREFIX = "trevni.meta.";
+
+  /** Add metadata to job output files.*/
+  public static void setMeta(JobConf job, String key, String value) {
+    job.set(META_PREFIX+key, value);
+  }
+
+  @Override
+  public RecordWriter<AvroWrapper<T>, NullWritable>
+    getRecordWriter(FileSystem ignore, final JobConf job,
+                    final String name, Progressable prog)
+    throws IOException {
+
+    boolean isMapOnly = job.getNumReduceTasks() == 0;
+    final Schema schema = isMapOnly
+      ? AvroJob.getMapOutputSchema(job)
+      : AvroJob.getOutputSchema(job);
+
+    final ColumnFileMetaData meta = new ColumnFileMetaData();
+    for (Map.Entry<String,String> e : job)
+      if (e.getKey().startsWith(META_PREFIX))
+        meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
+                 e.getValue().getBytes(MetaData.UTF8));
+
+    final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
+    final FileSystem fs = dir.getFileSystem(job);
+    if (!fs.mkdirs(dir))
+      throw new IOException("Failed to create directory: " + dir);
+    final long blockSize = fs.getDefaultBlockSize();
+
+    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
+      private int part = 0;
+
+      private AvroColumnWriter<T> writer =
+        new AvroColumnWriter<T>(schema, meta, ReflectData.get());
+    
+      private void flush() throws IOException {
+        OutputStream out = fs.create(new Path(dir, "part-"+(part++)+EXT));
+        try {
+          writer.writeTo(out);
+        } finally {
+          out.close();
+        }
+        writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
+      }
+
+      public void write(AvroWrapper<T> wrapper, NullWritable ignore)
+        throws IOException {
+        writer.write(wrapper.datum());
+        if (writer.sizeEstimate() >= blockSize)              // block full
+          flush();
+      }
+      public void close(Reporter reporter) throws IOException {
+        flush();
+      }
+    };
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import org.apache.trevni.Input;
+
+/** Adapt a Hadoop {@link FSDataInputStream} to Trevni's {@link Input}. */
+public class HadoopInput implements Input {
+  private final FSDataInputStream stream;
+  private final long len;
+
+  /** Construct given a path and a configuration. */
+  public HadoopInput(Path path, Configuration conf) throws IOException {
+    this.stream = path.getFileSystem(conf).open(path);
+    this.len = path.getFileSystem(conf).getFileStatus(path).getLen();
+  }
+
+  @Override public long length() {
+    return len;
+  }
+
+  @Override public int read(long p, byte[] b, int s, int l) throws IOException {
+    return stream.read(p, b, s, l);
+  }
+
+  @Override public void close() throws IOException {
+    stream.close();
+  }
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,38 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>Read and write <a href="http://avro.apache.org/">Avro</a> data
+in Trevni column files.</body>
+
+<h2>Limitations</h2>
+
+The current implementation does not correctly handle all Avro data.
+In particular:
+
+<ul>
+  <li>Recursive types are not supported.</li>
+  <li>With ReflectData, fields of Java type <b>byte</b>, <b>short</b>
+    and <b>char</b> are not supported.  Instead use int. </li>
+  <li>With ReflectData, Java arrays are not supported.  Instead use
+  List. </li>
+  <li>An <b>enum</b> is always read as a GenericData.EnumSymbol, even
+    when SpecificData or ReflectData are used. </li>
+</ul>
+
+</html>

Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc Tue Sep 11 21:35:56 2012
@@ -0,0 +1,78 @@
+{
+    "type": "record",
+    "name": "Document",
+    "fields": [
+        {
+            "name": "DocId",
+            "type": "long"
+        },
+        {
+            "name": "Links",
+            "type": [
+                "null",
+                {
+                    "name": "Links",
+                    "type": "record",
+                    "fields": [
+                        {
+                            "name": "Backward",
+                            "type": {
+                                "type": "array",
+                                "items": "long"
+                            }
+                        },
+                        {
+                            "name": "Forward",
+                            "type": {
+                                "type": "array",
+                                "items": "long"
+                            }
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "name": "Name",
+            "type": {
+                "type": "array",
+                "items": {
+                    "name": "Name",
+                    "type": "record",
+                    "fields": [
+                        {
+                            "name": "Language",
+                            "type": {
+                                "type": "array",
+                                "items": {
+                                    "name": "Language",
+                                    "type": "record",
+                                    "fields": [
+                                        {
+                                            "name": "Code",
+                                            "type": "string"
+                                        },
+                                        {
+                                            "name": "Country",
+                                            "type": [
+                                                "null",
+                                                "string"
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        {
+                            "name": "Url",
+                            "type": [
+                                "null",
+                                "string"
+                            ]
+                        }
+                    ]
+                }
+            }
+        }
+    ]
+}

Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json Tue Sep 11 21:35:56 2012
@@ -0,0 +1,73 @@
+{
+    "DocId": 10,
+    "Links": {
+        "Links": {
+            "Backward": [],
+            "Forward": [
+                20,
+                40,
+                60
+            ]
+        }
+    },
+    "Name": [
+        {
+            "Language": [
+                {
+                    "Code": "en-us",
+                    "Country": {
+                        "string": "us"
+                    }
+                },
+                {
+                    "Code": "en",
+                    "Country": null
+                }
+            ],
+            "Url": {
+                "string": "http://A"
+            }
+        },
+        {
+            "Language": [],
+            "Url": {
+                "string": "http://B"
+            }
+        },
+        {
+            "Language": [
+                {
+                    "Code": "en-gb",
+                    "Country": {
+                        "string": "gb"
+                    }
+                }
+            ],
+            "Url": null
+        }
+    ]
+}
+{
+    "DocId": 20,
+    "Links": {
+        "Links": {
+            "Backward": [
+                10,
+                30
+            ],
+            "Forward": [
+                80
+            ]
+        }
+    },
+    "Name": [
+        {
+            "Language": [
+                
+            ],
+            "Url": {
+                "string": "http://C"
+            }
+        }
+    ]
+}

Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc Tue Sep 11 21:35:56 2012
@@ -0,0 +1,41 @@
+{
+    "type": "record",
+    "name": "Document",
+    "fields": [
+        {
+            "name": "DocId",
+            "type": "long"
+        },
+        {
+            "name": "Name",
+            "type": {
+                "type": "array",
+                "items": {
+                    "name": "Name",
+                    "type": "record",
+                    "fields": [
+                        {
+                            "name": "Language",
+                            "type": {
+                                "type": "array",
+                                "items": {
+                                    "name": "Language",
+                                    "type": "record",
+                                    "fields": [
+                                        {
+                                            "name": "Country",
+                                            "type": [
+                                                "null",
+                                                "string"
+                                            ]
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+    ]
+}

Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json Tue Sep 11 21:35:56 2012
@@ -0,0 +1,37 @@
+{
+    "DocId": 10,
+    "Name": [
+        {
+            "Language": [
+                {
+                    "Country": {
+                        "string": "us"
+                    }
+                },
+                {
+                    "Country": null
+                }
+            ]
+        },
+        {
+            "Language": []
+        },
+        {
+            "Language": [
+                {
+                    "Country": {
+                        "string": "gb"
+                    }
+                }
+            ]
+        }
+    ]
+}
+{
+    "DocId": 20,
+    "Name": [
+        {
+            "Language": []
+        }
+    ]
+}

Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericArray;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+
+import org.apache.trevni.TestUtil;
+
+/** Generates schema data as Java objects with random values. */
+public class RandomData implements Iterable<Object> {
+  private final Schema root;
+  private final int count;
+
+  public RandomData(Schema schema, int count) {
+    this.root = schema;
+    this.count = count;
+  }
+  
+  public Iterator<Object> iterator() {
+    return new Iterator<Object>() {
+      private int n;
+      private Random random = TestUtil.createRandom();
+      public boolean hasNext() { return n < count; }
+      public Object next() {
+        n++;
+        return generate(root, random, 0);
+      }
+      public void remove() { throw new UnsupportedOperationException(); }
+    };
+  }
+  
+  @SuppressWarnings(value="unchecked")
+  private static Object generate(Schema schema, Random random, int d) {
+    switch (schema.getType()) {
+    case RECORD:
+      GenericRecord record = new GenericData.Record(schema);
+      for (Schema.Field field : schema.getFields())
+        record.put(field.name(), generate(field.schema(), random, d+1));
+      return record;
+    case ENUM:
+      List<String> symbols = schema.getEnumSymbols();
+      return new GenericData.EnumSymbol
+        (schema, symbols.get(random.nextInt(symbols.size())));
+    case ARRAY:
+      int length = (random.nextInt(5)+2)-d;
+      GenericArray<Object> array =
+        new GenericData.Array(length<=0?0:length, schema);
+      for (int i = 0; i < length; i++)
+        array.add(generate(schema.getElementType(), random, d+1));
+      return array;
+    case MAP:
+      length = (random.nextInt(5)+2)-d;
+      Map<Object,Object> map = new HashMap<Object,Object>(length<=0?0:length);
+      for (int i = 0; i < length; i++) {
+        map.put(TestUtil.randomString(random),
+                generate(schema.getValueType(), random, d+1));
+      }
+      return map;
+    case UNION:
+      List<Schema> types = schema.getTypes();
+      return generate(types.get(random.nextInt(types.size())), random, d);
+    case FIXED:
+      byte[] bytes = new byte[schema.getFixedSize()];
+      random.nextBytes(bytes);
+      return new GenericData.Fixed(schema, bytes);
+    case STRING:  return TestUtil.randomString(random);
+    case BYTES:   return TestUtil.randomBytes(random);
+    case INT:     return random.nextInt();
+    case LONG:    return random.nextLong();
+    case FLOAT:   return random.nextFloat();
+    case DOUBLE:  return random.nextDouble();
+    case BOOLEAN: return random.nextBoolean();
+    case NULL:    return null;
+    default: throw new RuntimeException("Unknown type: "+schema);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    if(args.length != 3) {
+      System.out.println("Usage: RandomData <schemafile> <outputfile> <count>");
+      System.exit(-1);
+    }
+    Schema sch = Schema.parse(new File(args[0]));
+    DataFileWriter<Object> writer =
+      new DataFileWriter<Object>(new GenericDatumWriter<Object>())
+      .create(sch, new File(args[1]));
+    try {
+      for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
+        writer.append(datum);
+      }
+    } finally {
+      writer.close();
+    }
+  }
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.trevni.ValueType;
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnFileMetaData;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.generic.GenericDatumReader;
+
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestCases {
+
+  private static final File DIR = new File("src/test/cases/");
+  private static final File FILE = new File("target", "case.trv");
+
+  @Test public void testCases() throws Exception {
+    for (File f : DIR.listFiles())
+      if (f.isDirectory() && !f.getName().startsWith("."))
+        runCase(f);
+  }
+
+  private void runCase(File dir) throws Exception {
+    Schema schema = Schema.parse(new File(dir, "input.avsc"));
+    List<Object> data = fromJson(schema, new File(dir, "input.json"));
+
+    // write full data
+    AvroColumnWriter<Object> writer =
+      new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
+    for (Object datum : data)
+      writer.write(datum);
+    writer.writeTo(FILE);
+
+    // test that the full schema reads correctly
+    checkRead(schema, data);
+
+    // test that sub-schemas read correctly
+    for (File f : dir.listFiles())
+      if (f.isDirectory() && !f.getName().startsWith(".")) {
+        Schema s = Schema.parse(new File(f, "sub.avsc"));
+        checkRead(s, fromJson(s, new File(f, "sub.json")));
+      }
+  }
+
+  private void checkRead(Schema s, List<Object> data) throws Exception {
+    AvroColumnReader<Object> reader =
+      new AvroColumnReader<Object>(new AvroColumnReader.Params(FILE)
+                                   .setSchema(s));
+    try {
+      for (Object datum : data)
+        assertEquals(datum, reader.next());
+    } finally {
+      reader.close();
+    }
+  }
+
+  private List<Object> fromJson(Schema schema, File file) throws Exception {
+    InputStream in = new FileInputStream(file);
+    List<Object> data = new ArrayList<Object>();
+    try {
+      DatumReader reader = new GenericDatumReader(schema);
+      Decoder decoder = DecoderFactory.get().jsonDecoder(schema, in);
+      while (true)
+        data.add(reader.read(null, decoder));
+    } catch (EOFException e) {
+    } finally {
+      in.close();
+    }
+    return data;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java
------------------------------------------------------------------------------
    svn:eol-style = native