You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2012/09/11 23:35:59 UTC
svn commit: r1383626 [1/3] - in /avro/trunk: ./ doc/src/content/xdocs/
lang/java/ lang/java/tools/
lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/trevni/
lang/java/trevni/avro/ lang/java/trevni/avro/src/
lang/java/trevni/avro/src/main/ l...
Author: cutting
Date: Tue Sep 11 21:35:56 2012
New Revision: 1383626
URL: http://svn.apache.org/viewvc?rev=1383626&view=rev
Log:
AVRO-806. Add specification of the Trevni columnar file format and a Java implementation of it.
Added:
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java (with props)
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java (with props)
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java (with props)
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java (with props)
avro/trunk/lang/java/trevni/ (with props)
avro/trunk/lang/java/trevni/avro/ (with props)
avro/trunk/lang/java/trevni/avro/pom.xml (with props)
avro/trunk/lang/java/trevni/avro/src/
avro/trunk/lang/java/trevni/avro/src/main/
avro/trunk/lang/java/trevni/avro/src/main/java/
avro/trunk/lang/java/trevni/avro/src/main/java/org/
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java (with props)
avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html (with props)
avro/trunk/lang/java/trevni/avro/src/test/
avro/trunk/lang/java/trevni/avro/src/test/cases/
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc
avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json
avro/trunk/lang/java/trevni/avro/src/test/java/
avro/trunk/lang/java/trevni/avro/src/test/java/org/
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java (with props)
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java (with props)
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java (with props)
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java (with props)
avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java (with props)
avro/trunk/lang/java/trevni/checkstyle.xml (with props)
avro/trunk/lang/java/trevni/core/ (with props)
avro/trunk/lang/java/trevni/core/pom.xml (with props)
avro/trunk/lang/java/trevni/core/src/
avro/trunk/lang/java/trevni/core/src/main/
avro/trunk/lang/java/trevni/core/src/main/java/
avro/trunk/lang/java/trevni/core/src/main/java/org/
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java (with props)
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html (with props)
avro/trunk/lang/java/trevni/core/src/main/java/overview.html (with props)
avro/trunk/lang/java/trevni/core/src/test/
avro/trunk/lang/java/trevni/core/src/test/java/
avro/trunk/lang/java/trevni/core/src/test/java/org/
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java (with props)
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java (with props)
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java (with props)
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java (with props)
avro/trunk/lang/java/trevni/doc/ (with props)
avro/trunk/lang/java/trevni/doc/apt/
avro/trunk/lang/java/trevni/doc/apt/spec.apt (with props)
avro/trunk/lang/java/trevni/doc/pom.xml (with props)
avro/trunk/lang/java/trevni/doc/resources/
avro/trunk/lang/java/trevni/doc/resources/css/
avro/trunk/lang/java/trevni/doc/resources/css/site.css (with props)
avro/trunk/lang/java/trevni/doc/site.xml (with props)
avro/trunk/lang/java/trevni/pom.xml (with props)
Modified:
avro/trunk/CHANGES.txt
avro/trunk/build.sh
avro/trunk/doc/src/content/xdocs/site.xml
avro/trunk/lang/java/pom.xml
avro/trunk/lang/java/tools/pom.xml
avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
avro/trunk/pom.xml
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Sep 11 21:35:56 2012
@@ -4,6 +4,9 @@ Avro 1.7.2 (unreleased)
NEW FEATURES
+ AVRO-806. Add specification of the Trevni columnar file format
+ and a Java implementation of it. (cutting)
+
IMPROVEMENTS
AVRO-1146. Java: Serialize several built-in Java classes as
Modified: avro/trunk/build.sh
URL: http://svn.apache.org/viewvc/avro/trunk/build.sh?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/build.sh (original)
+++ avro/trunk/build.sh Tue Sep 11 21:35:56 2012
@@ -94,6 +94,7 @@ case "$target" in
# build lang-specific artifacts
(cd lang/java; mvn -P dist package -DskipTests -Davro.version=$VERSION javadoc:aggregate)
+ (cd lang/java/trevni/doc; mvn site)
(mvn -N -P copy-artifacts antrun:run)
(cd lang/py; ant dist)
Modified: avro/trunk/doc/src/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/avro/trunk/doc/src/content/xdocs/site.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/doc/src/content/xdocs/site.xml (original)
+++ avro/trunk/doc/src/content/xdocs/site.xml Tue Sep 11 21:35:56 2012
@@ -42,6 +42,7 @@ See http://forrest.apache.org/docs/linki
<docs label="Documentation">
<overview label="Overview" href="index.html" />
<spec label="Specification" href="spec.html" />
+ <trevni label="Trevni" href="ext:trevni/spec" />
<java-api label="Java API" href="ext:api/java/index" />
<c-api label="C API" href="ext:api/c/index" />
<cpp-api label="C++ API" href="ext:api/cpp/index" />
@@ -79,6 +80,9 @@ See http://forrest.apache.org/docs/linki
<index href="index.html" />
</java>
</api>
+ <trevni href="trevni/">
+ <spec href="spec.html"/>
+ </trevni>
</external-refs>
</site>
Modified: avro/trunk/lang/java/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/pom.xml (original)
+++ avro/trunk/lang/java/pom.xml Tue Sep 11 21:35:56 2012
@@ -63,6 +63,7 @@
<jar-plugin.version>2.3.2</jar-plugin.version>
<javacc-plugin.version>2.6</javacc-plugin.version>
<javadoc-plugin.version>2.8</javadoc-plugin.version>
+ <maven-site-plugin.version>3.1</maven-site-plugin.version>
<plugin-plugin.version>2.9</plugin-plugin.version>
<source-plugin.version>2.1.2</source-plugin.version>
<surefire-plugin.version>2.12</surefire-plugin.version>
@@ -76,6 +77,7 @@
<module>compiler</module>
<module>maven-plugin</module>
<module>ipc</module>
+ <module>trevni</module>
<module>tools</module>
<module>mapred</module>
<module>protobuf</module>
Modified: avro/trunk/lang/java/tools/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/pom.xml (original)
+++ avro/trunk/lang/java/tools/pom.xml Tue Sep 11 21:35:56 2012
@@ -110,6 +110,28 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.trevni</groupId>
+ <artifactId>trevni-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.trevni</groupId>
+ <artifactId>trevni-avro</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.trevni</groupId>
+ <artifactId>trevni-core</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.trevni</groupId>
+ <artifactId>trevni-avro</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>${jackson.version}</version>
Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java Tue Sep 11 21:35:56 2012
@@ -49,7 +49,10 @@ public class Main {
new RpcSendTool(),
new FromTextTool(),
new ToTextTool(),
- new TetherTool()
+ new TetherTool(),
+ new TrevniCreateRandomTool(),
+ new TrevniMetadataTool(),
+ new TrevniToJsonTool()
}) {
Tool prev = tools.put(tool.getName(), tool);
if (prev != null) {
Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.trevni.ColumnFileMetaData;
+import org.apache.trevni.avro.AvroColumnWriter;
+import org.apache.trevni.avro.RandomData;
+
+/** Tool to create randomly populated Trevni file based on an Avro schema */
+public class TrevniCreateRandomTool implements Tool {
+
+ @Override
+ public String getName() {
+ return "trevni_random";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Create a Trevni file filled with random instances of a schema.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ if (args.size() != 3) {
+ err.println("Usage: schemaFile count outputFile");
+ return 1;
+ }
+
+ File schemaFile = new File(args.get(0));
+ int count = Integer.parseInt(args.get(1));
+ File outputFile = new File(args.get(2));
+
+ Schema schema = Schema.parse(schemaFile);
+
+ AvroColumnWriter<Object> writer =
+ new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
+
+ for (Object datum : new RandomData(schema, count))
+ writer.write(datum);
+
+ writer.writeTo(outputFile);
+
+ return 0;
+ }
+}
Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.MetaData;
+import org.apache.trevni.ColumnMetaData;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
+
+/** Tool to print Trevni file metadata as JSON. */
+public class TrevniMetadataTool implements Tool {
+ static final JsonFactory FACTORY = new JsonFactory();
+
+ private JsonGenerator generator;
+ private ColumnFileReader reader;
+
+ @Override
+ public String getName() {
+ return "trevni_meta";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Dumps a Trevni file's metadata as JSON.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ String filename;
+ boolean pretty = false;
+ if (args.size() == 2 && "-pretty".equals(args.get(0))) {
+ pretty = true;
+ filename = args.get(1);
+ } else if (args.size() == 1) {
+ filename = args.get(0);
+ } else {
+ err.println("Usage: [-pretty] input");
+ return 1;
+ }
+
+ dump(TrevniUtil.input(filename), out, pretty);
+
+ return 0;
+ }
+
+ /** Read a Trevni file and print each row as a JSON object. */
+ public void dump(Input input, PrintStream out, boolean pretty)
+ throws IOException {
+ this.generator = FACTORY.createJsonGenerator(out, JsonEncoding.UTF8);
+ if (pretty) {
+ generator.useDefaultPrettyPrinter();
+ } else { // ensure newline separation
+ MinimalPrettyPrinter pp = new MinimalPrettyPrinter();
+ pp.setRootValueSeparator(System.getProperty("line.separator"));
+ generator.setPrettyPrinter(pp);
+ }
+
+ this.reader = new ColumnFileReader(input);
+
+ generator.writeStartObject();
+ generator.writeNumberField("rowCount", reader.getRowCount());
+ generator.writeNumberField("columnCount", reader.getColumnCount());
+
+ generator.writeFieldName("metadata");
+ dump(reader.getMetaData());
+
+ generator.writeFieldName("columns");
+ generator.writeStartArray();
+ for (ColumnMetaData c : reader.getColumnMetaData())
+ dump(c);
+ generator.writeEndArray();
+
+ generator.writeEndObject();
+
+ generator.flush();
+ out.println();
+ reader.close();
+ }
+
+ private void dump(MetaData<?> meta) throws IOException {
+ generator.writeStartObject();
+ for (Map.Entry<String,byte[]> e : meta.entrySet())
+ generator.writeStringField(e.getKey(),
+ new String(e.getValue(), "ISO-8859-1"));
+ generator.writeEndObject();
+ }
+
+}
Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniMetadataTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnValues;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
+
+/** Tool to read Trevni files and print them as JSON.
+ * This can read any Trevni file. Nested structure is reconstructed from the
+ * columns rather than any schema information.
+ */
+public class TrevniToJsonTool implements Tool {
+ static final JsonFactory FACTORY = new JsonFactory();
+
+ private JsonGenerator generator;
+ private ColumnFileReader reader;
+ private ColumnValues[] values;
+ private String[] shortNames;
+
+ @Override
+ public String getName() {
+ return "trevni_tojson";
+ }
+
+ @Override
+ public String getShortDescription() {
+ return "Dumps a Trevni file as JSON.";
+ }
+
+ @Override
+ public int run(InputStream stdin, PrintStream out, PrintStream err,
+ List<String> args) throws Exception {
+ String filename;
+ boolean pretty = false;
+ if (args.size() == 2 && "-pretty".equals(args.get(0))) {
+ pretty = true;
+ filename = args.get(1);
+ } else if (args.size() == 1) {
+ filename = args.get(0);
+ } else {
+ err.println("Usage: [-pretty] input");
+ return 1;
+ }
+
+ toJson(TrevniUtil.input(filename), out, pretty);
+
+ return 0;
+ }
+
+ /** Read a Trevni file and print each row as a JSON object. */
+ public void toJson(Input input, PrintStream out, boolean pretty)
+ throws IOException {
+ this.generator = FACTORY.createJsonGenerator(out, JsonEncoding.UTF8);
+ if (pretty) {
+ generator.useDefaultPrettyPrinter();
+ } else { // ensure newline separation
+ MinimalPrettyPrinter pp = new MinimalPrettyPrinter();
+ pp.setRootValueSeparator(System.getProperty("line.separator"));
+ generator.setPrettyPrinter(pp);
+ }
+
+ this.reader = new ColumnFileReader(input);
+
+ int columnCount = (int)reader.getColumnCount();
+ this.values = new ColumnValues[columnCount];
+ this.shortNames = new String[columnCount];
+ for (int i = 0; i < columnCount; i++) {
+ values[i] = reader.getValues(i);
+ shortNames[i] = shortName(reader.getColumnMetaData(i));
+ }
+
+ List<ColumnMetaData> roots = reader.getRoots();
+ for (long row = 0; row < reader.getRowCount(); row++) {
+ for (ColumnValues v : values)
+ v.startRow();
+ generator.writeStartObject();
+ for (ColumnMetaData root : roots)
+ valueToJson(root);
+ generator.writeEndObject();
+ }
+ generator.flush();
+ out.println();
+ reader.close();
+ }
+
+ private void valueToJson(ColumnMetaData column) throws IOException {
+ generator.writeFieldName(shortNames[column.getNumber()]);
+ ColumnValues in = values[column.getNumber()];
+ if (!column.isArray()) {
+ primitiveToJson(column, in.nextValue());
+ } else {
+ generator.writeStartArray();
+ int length = in.nextLength();
+ for (int i = 0; i < length; i++) {
+ Object value = in.nextValue();
+ List<ColumnMetaData> children = column.getChildren();
+ if (children.size() == 0) {
+ primitiveToJson(column, value);
+ } else {
+ generator.writeStartObject();
+ if (value != null) {
+ generator.writeFieldName("value$");
+ primitiveToJson(column, value);
+ }
+ for (ColumnMetaData child : children)
+ valueToJson(child);
+ generator.writeEndObject();
+ }
+ }
+ generator.writeEndArray();
+ }
+ }
+
+ private void primitiveToJson(ColumnMetaData column, Object value)
+ throws IOException {
+ switch (column.getType()) {
+ case NULL:
+ generator.writeNull(); break;
+ case INT:
+ generator.writeNumber((Integer)value); break;
+ case LONG:
+ generator.writeNumber((Long)value); break;
+ case FIXED32:
+ generator.writeNumber((Integer)value); break;
+ case FIXED64:
+ generator.writeNumber((Long)value); break;
+ case FLOAT:
+ generator.writeNumber((Float)value); break;
+ case DOUBLE:
+ generator.writeNumber((Double)value); break;
+ case STRING:
+ generator.writeString((String)value); break;
+ case BYTES:
+ generator.writeBinary((byte[])value);
+ break;
+ default:
+ throw new RuntimeException("Unknown value type: "+column.getType());
+ }
+ }
+
+ // trim off portion of name shared with parent
+ private String shortName(ColumnMetaData column) {
+ String name = column.getName();
+ ColumnMetaData parent = column.getParent();
+ if (parent != null && name.startsWith(parent.getName()))
+ name = name.substring(parent.getName().length());
+ if (!Character.isLetterOrDigit(name.charAt(0)))
+ name = name.substring(1);
+ return name;
+ }
+
+}
Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniToJsonTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+
+import org.apache.trevni.Input;
+import org.apache.trevni.avro.HadoopInput;
+import org.apache.trevni.InputFile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/** Static utility methods for tools. */
+class TrevniUtil {
+
+ static Input input(String filename) throws IOException {
+ if (filename.startsWith("hdfs://")) {
+ return new HadoopInput(new Path(filename), new Configuration());
+ } else {
+ return new InputFile(new File(filename));
+ }
+ }
+
+ /**
+ * Returns stdin if filename is "-", else opens the local or HDFS file
+ * and returns an InputStream for it.
+ * @throws IOException
+ */
+ static InputStream input(String filename, InputStream stdin)
+ throws IOException {
+ if (filename.equals("-"))
+ return new BufferedInputStream(stdin);
+ else if (filename.startsWith("hdfs://")) {
+ FileSystem fs = FileSystem.get(URI.create(filename), new Configuration());
+ return new BufferedInputStream(fs.open(new Path(filename)));
+ } else {
+ return new BufferedInputStream(new FileInputStream(new File(filename)));
+ }
+ }
+
+ /**
+ * Returns stdout if filename is "-", else opens the local or HDFS file
+ * and returns an OutputStream for it.
+ * @throws IOException
+ */
+ static OutputStream output(String filename, OutputStream stdout)
+ throws IOException {
+ if (filename.equals("-"))
+ return new BufferedOutputStream(stdout);
+ else if (filename.startsWith("hdfs://")) {
+ FileSystem fs = FileSystem.get(URI.create(filename), new Configuration());
+ return new BufferedOutputStream(fs.create(new Path(filename)));
+ } else {
+ return new BufferedOutputStream(new FileOutputStream(new File(filename)));
+ }
+ }
+
+}
Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: avro/trunk/lang/java/trevni/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target
Propchange: avro/trunk/lang/java/trevni/avro/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target
Added: avro/trunk/lang/java/trevni/avro/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/pom.xml (added)
+++ avro/trunk/lang/java/trevni/avro/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>trevni-java</artifactId>
+ <groupId>org.apache.trevni</groupId>
+ <version>1.7.2-SNAPSHOT</version>
+ <relativePath>../</relativePath>
+ </parent>
+
+ <artifactId>trevni-avro</artifactId>
+ <name>Trevni Java Avro</name>
+ <url>http://avro.apache.org/</url>
+ <description>Trevni Java Avro</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>trevni-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>trevni-core</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-mapred</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ </profiles>
+
+</project>
+
Propchange: avro/trunk/lang/java/trevni/avro/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnFileReader;
+import org.apache.trevni.ColumnValues;
+import org.apache.trevni.Input;
+import org.apache.trevni.InputFile;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericData;
+
+import static org.apache.trevni.avro.AvroColumnator.isSimple;
+
+/** Read files written with {@link AvroColumnWriter}. A subset of the schema
+ * used for writing may be specified when reading. In this case only columns
+ * of the subset schema are read. */
+public class AvroColumnReader<D>
+ implements Iterator<D>, Iterable<D>, Closeable {
+
+ private ColumnFileReader reader;
+ private GenericData model;
+ private Schema fileSchema;
+ private Schema readSchema;
+
+ private ColumnValues[] values;
+ private int[] arrayWidths;
+ private int column; // current index in values
+
+ /** Parameters for reading an Avro column file. */
+ public static class Params {
+ Input input;
+ Schema schema;
+ GenericData model = GenericData.get();
+
+ /** Construct reading from a file. */
+ public Params(File file) throws IOException {
+ this(new InputFile(file));
+ }
+
+ /** Construct reading from input. */
+ public Params(Input input) { this.input = input; }
+
+ /** Set subset schema to project data down to. */
+ public Params setSchema(Schema schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ /** Set data representation. */
+ public Params setModel(GenericData model) {
+ this.model = model;
+ return this;
+ }
+ }
+
+ /** Construct a reader for a file. */
+ public AvroColumnReader(Params params)
+ throws IOException {
+ this.reader = new ColumnFileReader(params.input);
+ this.model = params.model;
+ this.fileSchema =
+ Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY));
+ this.readSchema = params.schema == null ? fileSchema : params.schema;
+ initialize();
+ }
+
+ /** Return the schema for data in this file. */
+ public Schema getFileSchema() { return fileSchema; }
+
+ void initialize() throws IOException {
+ // compute a mapping from column name to number for file
+ Map<String,Integer> fileColumnNumbers = new HashMap<String,Integer>();
+ int i = 0;
+ for (ColumnMetaData c : new AvroColumnator(fileSchema).getColumns())
+ fileColumnNumbers.put(c.getName(), i++);
+
+ // create iterator for each column in readSchema
+ AvroColumnator readColumnator = new AvroColumnator(readSchema);
+ this.arrayWidths = readColumnator.getArrayWidths();
+ ColumnMetaData[] readColumns = readColumnator.getColumns();
+ this.values = new ColumnValues[readColumns.length];
+ int j = 0;
+ for (ColumnMetaData c : readColumns) {
+ Integer n = fileColumnNumbers.get(c.getName());
+ if (n == null)
+ throw new TrevniRuntimeException("No column named: "+c.getName());
+ values[j++] = reader.getValues(n);
+ }
+ }
+
+ @Override
+ public Iterator<D> iterator() { return this; }
+
+ @Override
+ public boolean hasNext() {
+ return values[0].hasNext();
+ }
+
+ /** Return the number of rows in this file. */
+ public long getRowCount() { return reader.getRowCount(); }
+
+ @Override
+ public D next() {
+ try {
+ for (int i = 0; i < values.length; i++)
+ values[i].startRow();
+ this.column = 0;
+ return (D)read(readSchema);
+ } catch (IOException e) {
+ throw new TrevniRuntimeException(e);
+ }
+ }
+
+ private Object read(Schema s) throws IOException {
+ if (isSimple(s))
+ return nextValue(s, column++);
+
+ final int startColumn = column;
+
+ switch (s.getType()) {
+ case MAP:
+ int size = values[column].nextLength();
+ Map map = (Map)new HashMap(size);
+ for (int i = 0; i < size; i++) {
+ this.column = startColumn;
+ values[column++].nextValue(); // null in parent
+ String key = (String)values[column++].nextValue(); // key
+ map.put(key, read(s.getValueType())); // value
+ }
+ column = startColumn + arrayWidths[startColumn];
+ return map;
+ case RECORD:
+ Object record = model.newRecord(null, s);
+ for (Field f : s.getFields())
+ model.setField(record, f.name(), f.pos(), read(f.schema()));
+ return record;
+ case ARRAY:
+ int length = values[column].nextLength();
+ List elements = (List)new GenericData.Array(length, s);
+ for (int i = 0; i < length; i++) {
+ this.column = startColumn;
+ Object value = nextValue(s, column++);
+ if (!isSimple(s.getElementType()))
+ value = read(s.getElementType());
+ elements.add(value);
+ }
+ column = startColumn + arrayWidths[startColumn];
+ return elements;
+ case UNION:
+ Object value = null;
+ for (Schema branch : s.getTypes()) {
+ if (branch.getType() == Schema.Type.NULL) continue;
+ if (values[column].nextLength() == 1) {
+ value = nextValue(s, column);
+ column++;
+ if (!isSimple(branch))
+ value = read(branch);
+ } else {
+ column += arrayWidths[column];
+ }
+ }
+ return value;
+ default:
+ throw new TrevniRuntimeException("Unknown schema: "+s);
+ }
+ }
+
+ private Object nextValue(Schema s, int column) throws IOException {
+ Object v = values[column].nextValue();
+
+ switch (s.getType()) {
+ case ENUM:
+ return new GenericData.EnumSymbol(s, s.getEnumSymbols().get((Integer)v));
+ case FIXED:
+ return new GenericData.Fixed(s, ((ByteBuffer)v).array());
+ }
+
+ return v;
+ }
+
+ @Override
+ public void remove() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.File;
+import java.io.OutputStream;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.trevni.ColumnFileMetaData;
+import org.apache.trevni.ColumnFileWriter;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.util.Utf8;
+
+import static org.apache.trevni.avro.AvroColumnator.isSimple;
+
+/** Write Avro records to a Trevni column file.
+ *
+ * <p>Each primitive type is written to a separate column.
+ *
+ * <p>Output is buffered until {@link #writeTo(OutputStream)} is called. The
+ * {@link #sizeEstimate()} indicates both the amount of data buffered and the
+ * size of the file that will be written.
+ */
+public class AvroColumnWriter<D> {
+ private Schema schema;
+ private GenericData model;
+ private ColumnFileWriter writer;
+ private int[] arrayWidths;
+
+ public static final String SCHEMA_KEY = "avro.schema";
+
+ public AvroColumnWriter(Schema s, ColumnFileMetaData meta)
+ throws IOException {
+ this(s, meta, GenericData.get());
+ }
+
+ public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model)
+ throws IOException {
+ this.schema = s;
+ AvroColumnator columnator = new AvroColumnator(s);
+ meta.set(SCHEMA_KEY, s.toString()); // save schema in file
+ this.writer = new ColumnFileWriter(meta, columnator.getColumns());
+ this.arrayWidths = columnator.getArrayWidths();
+ this.model = model;
+ }
+
+ /** Return the approximate size of the file that will be written. Tries to
+ * slightly over-estimate. Indicates both the size in memory of the buffered
+ * data as well as the size of the file that will be written by {@link
+ * #writeTo(OutputStream)}. */
+ public long sizeEstimate() { return writer.sizeEstimate(); }
+
+ /** Write all rows added to the named output stream. */
+ public void writeTo(OutputStream out) throws IOException {
+ writer.writeTo(out);
+ }
+
+ /** Write all rows added to the named file. */
+ public void writeTo(File file) throws IOException {
+ writer.writeTo(file);
+ }
+
+ /** Add a row to the file. */
+ public void write(D value) throws IOException {
+ writer.startRow();
+ int count = write(value, schema, 0);
+ assert(count == writer.getColumnCount());
+ writer.endRow();
+ }
+
+ private int write(Object o, Schema s, int column) throws IOException {
+ if (isSimple(s)) {
+ writeValue(o, s, column);
+ return column+1;
+ }
+ switch (s.getType()) {
+ case MAP:
+ Map<?,?> map = (Map)o;
+ writer.writeLength(map.size(), column);
+ for (Map.Entry e : map.entrySet()) {
+ writer.writeValue(null, column);
+ writer.writeValue(e.getKey(), column+1);
+ int c = write(e.getValue(), s.getValueType(), column+2);
+ assert(c == column+arrayWidths[column]);
+ }
+ return column+arrayWidths[column];
+ case RECORD:
+ for (Field f : s.getFields())
+ column = write(model.getField(o,f.name(),f.pos()), f.schema(), column);
+ return column;
+ case ARRAY:
+ Collection elements = (Collection)o;
+ writer.writeLength(elements.size(), column);
+ if (isSimple(s.getElementType())) { // optimize simple arrays
+ for (Object element : elements)
+ writeValue(element, s.getElementType(), column);
+ return column+1;
+ }
+ for (Object element : elements) {
+ writer.writeValue(null, column);
+ int c = write(element, s.getElementType(), column+1);
+ assert(c == column+arrayWidths[column]);
+ }
+ return column+arrayWidths[column];
+ case UNION:
+ int b = model.resolveUnion(s, o);
+ int i = 0;
+ for (Schema branch : s.getTypes()) {
+ boolean selected = i++ == b;
+ if (branch.getType() == Schema.Type.NULL) continue;
+ if (!selected) {
+ writer.writeLength(0, column);
+ column+=arrayWidths[column];
+ } else {
+ writer.writeLength(1, column);
+ if (isSimple(branch)) {
+ writeValue(o, branch, column++);
+ } else {
+ writer.writeValue(null, column);
+ column = write(o, branch, column+1);
+ }
+ }
+ }
+ return column;
+ default:
+ throw new TrevniRuntimeException("Unknown schema: "+s);
+ }
+ }
+
+ private void writeValue(Object value, Schema s, int column)
+ throws IOException {
+
+ switch (s.getType()) {
+ case STRING:
+ if (value instanceof Utf8) // convert Utf8 to String
+ value = value.toString();
+ break;
+ case ENUM:
+ if (value instanceof Enum)
+ value = ((Enum)value).ordinal();
+ else
+ value = s.getEnumOrdinal(value.toString());
+ break;
+ case FIXED:
+ value = ((GenericFixed)value).bytes();
+ break;
+ }
+ writer.writeValue(value, column);
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.IdentityHashMap;
+
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ValueType;
+import org.apache.trevni.TrevniRuntimeException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+
+/** Utility that computes the column layout of a schema. */
+class AvroColumnator {
+
+ private Schema schema;
+
+ private List<ColumnMetaData> columns = new ArrayList<ColumnMetaData>();
+ private List<Integer> arrayWidths = new ArrayList<Integer>();
+
+ public AvroColumnator(Schema schema) {
+ this.schema = schema;
+ columnize(null, schema, null, false);
+ }
+
+ /** Return columns for the schema. */
+ public ColumnMetaData[] getColumns() {
+ return columns.toArray(new ColumnMetaData[columns.size()]);
+ }
+
+ /** Return array giving the number of columns immediately following each
+ * column that are descendents of that column. */
+ public int[] getArrayWidths() {
+ int[] result = new int[arrayWidths.size()];
+ int i = 0;
+ for (Integer width : arrayWidths)
+ result[i++] = width;
+ return result;
+ }
+
+ private Map<Schema,Schema> seen = new IdentityHashMap<Schema,Schema>();
+
+ private void columnize(String path, Schema s,
+ ColumnMetaData parent, boolean isArray) {
+
+ if (isSimple(s)) {
+ if (path == null) path = s.getFullName();
+ addColumn(path, simpleValueType(s), parent, isArray);
+ return;
+ }
+
+ if (seen.containsKey(s)) // catch recursion
+ throw new TrevniRuntimeException("Cannot shred recursive schemas: "+s);
+ seen.put(s, s);
+
+ switch (s.getType()) {
+ case MAP:
+ path = path == null ? ">" : path+">";
+ int start = columns.size();
+ ColumnMetaData p = addColumn(path, ValueType.NULL, parent, true);
+ addColumn(p(path,"key", ""), ValueType.STRING, p, false);
+ columnize(p(path,"value", ""), s.getValueType(), p, false);
+ arrayWidths.set(start, columns.size()-start); // fixup with actual width
+ break;
+ case RECORD:
+ for (Field field : s.getFields()) // flatten fields to columns
+ columnize(p(path, field.name(), "#"), field.schema(), parent, isArray);
+ break;
+ case ARRAY:
+ path = path == null ? "[]" : path+"[]";
+ addArrayColumn(path, s.getElementType(), parent);
+ break;
+ case UNION:
+ for (Schema branch : s.getTypes()) // array per non-null branch
+ if (branch.getType() != Schema.Type.NULL)
+ addArrayColumn(p(path, branch, "/"), branch, parent);
+ break;
+ default:
+ throw new TrevniRuntimeException("Unknown schema: "+s);
+ }
+ }
+
+ private String p(String parent, Schema child, String sep) {
+ if (child.getType() == Schema.Type.UNION)
+ return parent;
+ return p(parent, child.getFullName(), sep);
+ }
+
+ private String p(String parent, String child, String sep) {
+ return parent == null ? child : parent + sep + child;
+ }
+
+ private ColumnMetaData addColumn(String path, ValueType type,
+ ColumnMetaData parent, boolean isArray) {
+ ColumnMetaData column = new ColumnMetaData(path, type);
+ if (parent != null)
+ column.setParent(parent);
+ column.isArray(isArray);
+ columns.add(column);
+ arrayWidths.add(1); // placeholder
+ return column;
+ }
+
+ private void addArrayColumn(String path, Schema element,
+ ColumnMetaData parent) {
+ if (path == null) path = element.getFullName();
+ if (isSimple(element)) { // optimize simple arrays
+ addColumn(path, simpleValueType(element), parent, true);
+ return;
+ }
+ // complex array: insert a parent column with lengths
+ int start = columns.size();
+ ColumnMetaData array = addColumn(path, ValueType.NULL, parent, true);
+ columnize(path, element, array, false);
+ arrayWidths.set(start, columns.size()-start); // fixup with actual width
+ }
+
+ static boolean isSimple(Schema s) {
+ switch (s.getType()) {
+ case NULL:
+ case INT: case LONG:
+ case FLOAT: case DOUBLE:
+ case BYTES: case STRING:
+ case ENUM: case FIXED:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ private ValueType simpleValueType(Schema s) {
+ switch (s.getType()) {
+ case NULL: return ValueType.NULL;
+ case INT: return ValueType.INT;
+ case LONG: return ValueType.LONG;
+ case FLOAT: return ValueType.FLOAT;
+ case DOUBLE: return ValueType.DOUBLE;
+ case BYTES: return ValueType.BYTES;
+ case STRING: return ValueType.STRING;
+ case ENUM: return ValueType.INT;
+ case FIXED: return ValueType.BYTES;
+ default:
+ throw new TrevniRuntimeException("Unknown schema: "+s);
+ }
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroColumnator.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RecordReader;
+
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+
+/** An {@link org.apache.hadoop.mapred.InputFormat} for Trevni files.
+ *
+ * <p>A subset schema to be read may be specified with {@link
+ * AvroJob#setInputSchema(Schema)}.
+ */
+public class AvroTrevniInputFormat<T>
+ extends FileInputFormat<AvroWrapper<T>, NullWritable> {
+
+ @Override
+ protected boolean isSplitable(FileSystem fs, Path filename) {
+ return false;
+ }
+
+ @Override
+ protected FileStatus[] listStatus(JobConf job) throws IOException {
+ List<FileStatus> result = new ArrayList<FileStatus>();
+ job.setBoolean("mapred.input.dir.recursive", true);
+ for (FileStatus file : super.listStatus(job))
+ if (file.getPath().getName().endsWith(AvroTrevniOutputFormat.EXT))
+ result.add(file);
+ return result.toArray(new FileStatus[0]);
+ }
+
+ @Override
+ public RecordReader<AvroWrapper<T>, NullWritable>
+ getRecordReader(InputSplit split, final JobConf job,
+ Reporter reporter) throws IOException {
+ final FileSplit file = (FileSplit)split;
+ reporter.setStatus(file.toString());
+
+ final AvroColumnReader.Params params =
+ new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
+ params.setModel(ReflectData.get());
+ if (job.get(AvroJob.INPUT_SCHEMA) != null)
+ params.setSchema(AvroJob.getInputSchema(job));
+
+ return new RecordReader<AvroWrapper<T>, NullWritable>() {
+ private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
+ private float rows = reader.getRowCount();
+ private long row;
+
+ public AvroWrapper<T> createKey() { return new AvroWrapper<T>(null); }
+
+ public NullWritable createValue() { return NullWritable.get(); }
+
+ public boolean next(AvroWrapper<T> wrapper, NullWritable ignore)
+ throws IOException {
+ if (!reader.hasNext())
+ return false;
+ wrapper.datum(reader.next());
+ row++;
+ return true;
+ }
+
+ public float getProgress() throws IOException { return row / rows; }
+
+ public long getPos() throws IOException { return row; }
+
+ public void close() throws IOException { reader.close(); }
+
+ };
+
+ }
+
+}
+
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniInputFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Map;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.util.Progressable;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroWrapper;
+
+import org.apache.trevni.MetaData;
+import org.apache.trevni.ColumnFileMetaData;
+
+/** An {@link org.apache.hadoop.mapred.OutputFormat} that writes Avro data to
+ * Trevni files.
+ *
+ * <p>Writes a directory of files per task, each comprising a single filesystem
+ * block. To reduce the number of files, increase the default filesystem block
+ * size for the job. Each task also requires enough memory to buffer a
+ * filesystem block.
+ */
+public class AvroTrevniOutputFormat <T>
+ extends FileOutputFormat<AvroWrapper<T>, NullWritable> {
+
+ /** The file name extension for trevni files. */
+ public final static String EXT = ".trv";
+
+ public static final String META_PREFIX = "trevni.meta.";
+
+ /** Add metadata to job output files.*/
+ public static void setMeta(JobConf job, String key, String value) {
+ job.set(META_PREFIX+key, value);
+ }
+
+ @Override
+ public RecordWriter<AvroWrapper<T>, NullWritable>
+ getRecordWriter(FileSystem ignore, final JobConf job,
+ final String name, Progressable prog)
+ throws IOException {
+
+ boolean isMapOnly = job.getNumReduceTasks() == 0;
+ final Schema schema = isMapOnly
+ ? AvroJob.getMapOutputSchema(job)
+ : AvroJob.getOutputSchema(job);
+
+ final ColumnFileMetaData meta = new ColumnFileMetaData();
+ for (Map.Entry<String,String> e : job)
+ if (e.getKey().startsWith(META_PREFIX))
+ meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
+ e.getValue().getBytes(MetaData.UTF8));
+
+ final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
+ final FileSystem fs = dir.getFileSystem(job);
+ if (!fs.mkdirs(dir))
+ throw new IOException("Failed to create directory: " + dir);
+ final long blockSize = fs.getDefaultBlockSize();
+
+ return new RecordWriter<AvroWrapper<T>, NullWritable>() {
+ private int part = 0;
+
+ private AvroColumnWriter<T> writer =
+ new AvroColumnWriter<T>(schema, meta, ReflectData.get());
+
+ private void flush() throws IOException {
+ OutputStream out = fs.create(new Path(dir, "part-"+(part++)+EXT));
+ try {
+ writer.writeTo(out);
+ } finally {
+ out.close();
+ }
+ writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
+ }
+
+ public void write(AvroWrapper<T> wrapper, NullWritable ignore)
+ throws IOException {
+ writer.write(wrapper.datum());
+ if (writer.sizeEstimate() >= blockSize) // block full
+ flush();
+ }
+ public void close(Reporter reporter) throws IOException {
+ flush();
+ }
+ };
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/AvroTrevniOutputFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataInputStream;
+
+import org.apache.trevni.Input;
+
+/** Adapt a Hadoop {@link FSDataInputStream} to Trevni's {@link Input}. */
+public class HadoopInput implements Input {
+ private final FSDataInputStream stream;
+ private final long len;
+
+ /** Construct given a path and a configuration. */
+ public HadoopInput(Path path, Configuration conf) throws IOException {
+ this.stream = path.getFileSystem(conf).open(path);
+ this.len = path.getFileSystem(conf).getFileStatus(path).getLen();
+ }
+
+ @Override public long length() {
+ return len;
+ }
+
+ @Override public int read(long p, byte[] b, int s, int l) throws IOException {
+ return stream.read(p, b, s, l);
+ }
+
+ @Override public void close() throws IOException {
+ stream.close();
+ }
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/HadoopInput.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html (added)
+++ avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,38 @@
+<html>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<body>Read and write <a href="http://avro.apache.org/">Avro</a> data
+in Trevni column files.</body>
+
+<h2>Limitations</h2>
+
+The current implementation does not correctly handle all Avro data.
+In particular:
+
+<ul>
+ <li>Recursive types are not supported.</li>
+ <li>With ReflectData, fields of Java type <b>byte</b>, <b>short</b>
+ and <b>char</b> are not supported. Instead use int. </li>
+ <li>With ReflectData, Java arrays are not supported. Instead use
+ List. </li>
+ <li>An <b>enum</b> is always read as a GenericData.EnumSymbol, even
+ when SpecificData or ReflectData are used. </li>
+</ul>
+
+</html>
Propchange: avro/trunk/lang/java/trevni/avro/src/main/java/org/apache/trevni/avro/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.avsc Tue Sep 11 21:35:56 2012
@@ -0,0 +1,78 @@
+{
+ "type": "record",
+ "name": "Document",
+ "fields": [
+ {
+ "name": "DocId",
+ "type": "long"
+ },
+ {
+ "name": "Links",
+ "type": [
+ "null",
+ {
+ "name": "Links",
+ "type": "record",
+ "fields": [
+ {
+ "name": "Backward",
+ "type": {
+ "type": "array",
+ "items": "long"
+ }
+ },
+ {
+ "name": "Forward",
+ "type": {
+ "type": "array",
+ "items": "long"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "Name",
+ "type": {
+ "type": "array",
+ "items": {
+ "name": "Name",
+ "type": "record",
+ "fields": [
+ {
+ "name": "Language",
+ "type": {
+ "type": "array",
+ "items": {
+ "name": "Language",
+ "type": "record",
+ "fields": [
+ {
+ "name": "Code",
+ "type": "string"
+ },
+ {
+ "name": "Country",
+ "type": [
+ "null",
+ "string"
+ ]
+ }
+ ]
+ }
+ }
+ },
+ {
+ "name": "Url",
+ "type": [
+ "null",
+ "string"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+}
Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/input.json Tue Sep 11 21:35:56 2012
@@ -0,0 +1,73 @@
+{
+ "DocId": 10,
+ "Links": {
+ "Links": {
+ "Backward": [],
+ "Forward": [
+ 20,
+ 40,
+ 60
+ ]
+ }
+ },
+ "Name": [
+ {
+ "Language": [
+ {
+ "Code": "en-us",
+ "Country": {
+ "string": "us"
+ }
+ },
+ {
+ "Code": "en",
+ "Country": null
+ }
+ ],
+ "Url": {
+ "string": "http://A"
+ }
+ },
+ {
+ "Language": [],
+ "Url": {
+ "string": "http://B"
+ }
+ },
+ {
+ "Language": [
+ {
+ "Code": "en-gb",
+ "Country": {
+ "string": "gb"
+ }
+ }
+ ],
+ "Url": null
+ }
+ ]
+}
+{
+ "DocId": 20,
+ "Links": {
+ "Links": {
+ "Backward": [
+ 10,
+ 30
+ ],
+ "Forward": [
+ 80
+ ]
+ }
+ },
+ "Name": [
+ {
+ "Language": [
+
+ ],
+ "Url": {
+ "string": "http://C"
+ }
+ }
+ ]
+}
Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.avsc Tue Sep 11 21:35:56 2012
@@ -0,0 +1,41 @@
+{
+ "type": "record",
+ "name": "Document",
+ "fields": [
+ {
+ "name": "DocId",
+ "type": "long"
+ },
+ {
+ "name": "Name",
+ "type": {
+ "type": "array",
+ "items": {
+ "name": "Name",
+ "type": "record",
+ "fields": [
+ {
+ "name": "Language",
+ "type": {
+ "type": "array",
+ "items": {
+ "name": "Language",
+ "type": "record",
+ "fields": [
+ {
+ "name": "Country",
+ "type": [
+ "null",
+ "string"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ }
+ }
+ ]
+}
Added: avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/cases/dremel/sub1/sub.json Tue Sep 11 21:35:56 2012
@@ -0,0 +1,37 @@
+{
+ "DocId": 10,
+ "Name": [
+ {
+ "Language": [
+ {
+ "Country": {
+ "string": "us"
+ }
+ },
+ {
+ "Country": null
+ }
+ ]
+ },
+ {
+ "Language": []
+ },
+ {
+ "Language": [
+ {
+ "Country": {
+ "string": "gb"
+ }
+ }
+ ]
+ }
+ ]
+}
+{
+ "DocId": 20,
+ "Name": [
+ {
+ "Language": []
+ }
+ ]
+}
Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericArray;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+
+import org.apache.trevni.TestUtil;
+
+/** Generates schema data as Java objects with random values. */
+public class RandomData implements Iterable<Object> {
+ private final Schema root;
+ private final int count;
+
+ public RandomData(Schema schema, int count) {
+ this.root = schema;
+ this.count = count;
+ }
+
+ public Iterator<Object> iterator() {
+ return new Iterator<Object>() {
+ private int n;
+ private Random random = TestUtil.createRandom();
+ public boolean hasNext() { return n < count; }
+ public Object next() {
+ n++;
+ return generate(root, random, 0);
+ }
+ public void remove() { throw new UnsupportedOperationException(); }
+ };
+ }
+
+ @SuppressWarnings(value="unchecked")
+ private static Object generate(Schema schema, Random random, int d) {
+ switch (schema.getType()) {
+ case RECORD:
+ GenericRecord record = new GenericData.Record(schema);
+ for (Schema.Field field : schema.getFields())
+ record.put(field.name(), generate(field.schema(), random, d+1));
+ return record;
+ case ENUM:
+ List<String> symbols = schema.getEnumSymbols();
+ return new GenericData.EnumSymbol
+ (schema, symbols.get(random.nextInt(symbols.size())));
+ case ARRAY:
+ int length = (random.nextInt(5)+2)-d;
+ GenericArray<Object> array =
+ new GenericData.Array(length<=0?0:length, schema);
+ for (int i = 0; i < length; i++)
+ array.add(generate(schema.getElementType(), random, d+1));
+ return array;
+ case MAP:
+ length = (random.nextInt(5)+2)-d;
+ Map<Object,Object> map = new HashMap<Object,Object>(length<=0?0:length);
+ for (int i = 0; i < length; i++) {
+ map.put(TestUtil.randomString(random),
+ generate(schema.getValueType(), random, d+1));
+ }
+ return map;
+ case UNION:
+ List<Schema> types = schema.getTypes();
+ return generate(types.get(random.nextInt(types.size())), random, d);
+ case FIXED:
+ byte[] bytes = new byte[schema.getFixedSize()];
+ random.nextBytes(bytes);
+ return new GenericData.Fixed(schema, bytes);
+ case STRING: return TestUtil.randomString(random);
+ case BYTES: return TestUtil.randomBytes(random);
+ case INT: return random.nextInt();
+ case LONG: return random.nextLong();
+ case FLOAT: return random.nextFloat();
+ case DOUBLE: return random.nextDouble();
+ case BOOLEAN: return random.nextBoolean();
+ case NULL: return null;
+ default: throw new RuntimeException("Unknown type: "+schema);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length != 3) {
+ System.out.println("Usage: RandomData <schemafile> <outputfile> <count>");
+ System.exit(-1);
+ }
+ Schema sch = Schema.parse(new File(args[0]));
+ DataFileWriter<Object> writer =
+ new DataFileWriter<Object>(new GenericDatumWriter<Object>())
+ .create(sch, new File(args[1]));
+ try {
+ for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
+ writer.append(datum);
+ }
+ } finally {
+ writer.close();
+ }
+ }
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.trevni.ValueType;
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnFileMetaData;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.generic.GenericDatumReader;
+
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestCases {
+
+ private static final File DIR = new File("src/test/cases/");
+ private static final File FILE = new File("target", "case.trv");
+
+ @Test public void testCases() throws Exception {
+ for (File f : DIR.listFiles())
+ if (f.isDirectory() && !f.getName().startsWith("."))
+ runCase(f);
+ }
+
+ private void runCase(File dir) throws Exception {
+ Schema schema = Schema.parse(new File(dir, "input.avsc"));
+ List<Object> data = fromJson(schema, new File(dir, "input.json"));
+
+ // write full data
+ AvroColumnWriter<Object> writer =
+ new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
+ for (Object datum : data)
+ writer.write(datum);
+ writer.writeTo(FILE);
+
+ // test that the full schema reads correctly
+ checkRead(schema, data);
+
+ // test that sub-schemas read correctly
+ for (File f : dir.listFiles())
+ if (f.isDirectory() && !f.getName().startsWith(".")) {
+ Schema s = Schema.parse(new File(f, "sub.avsc"));
+ checkRead(s, fromJson(s, new File(f, "sub.json")));
+ }
+ }
+
+ private void checkRead(Schema s, List<Object> data) throws Exception {
+ AvroColumnReader<Object> reader =
+ new AvroColumnReader<Object>(new AvroColumnReader.Params(FILE)
+ .setSchema(s));
+ try {
+ for (Object datum : data)
+ assertEquals(datum, reader.next());
+ } finally {
+ reader.close();
+ }
+ }
+
+ private List<Object> fromJson(Schema schema, File file) throws Exception {
+ InputStream in = new FileInputStream(file);
+ List<Object> data = new ArrayList<Object>();
+ try {
+ DatumReader reader = new GenericDatumReader(schema);
+ Decoder decoder = DecoderFactory.get().jsonDecoder(schema, in);
+ while (true)
+ data.add(reader.read(null, decoder));
+ } catch (EOFException e) {
+ } finally {
+ in.close();
+ }
+ return data;
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestCases.java
------------------------------------------------------------------------------
svn:eol-style = native