You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2012/09/11 23:35:59 UTC
svn commit: r1383626 [3/3] - in /avro/trunk: ./ doc/src/content/xdocs/
lang/java/ lang/java/tools/
lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/trevni/
lang/java/trevni/avro/ lang/java/trevni/avro/src/
lang/java/trevni/avro/src/main/ l...
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/** An {@link Input} backed with data in a byte array. */
+public class InputBytes extends ByteArrayInputStream implements Input {
+
+ /** Construct for the given bytes. */
+ public InputBytes(byte[] data) { super(data); }
+
+ /** Construct for the given bytes. */
+ public InputBytes(ByteBuffer data) {
+ super(data.array(), data.position(), data.limit());
+ }
+
+ @Override
+ public long length() throws IOException { return this.count; }
+
+ @Override
+ public synchronized int read(long pos, byte[] b, int start, int len)
+ throws IOException {
+ this.pos = (int)pos;
+ return read(b, start, len);
+ }
+
+ byte[] getBuffer() { return buf; }
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
+import java.io.IOException;
+
+/** An {@link Input} for files. */
+public class InputFile implements Input {
+
+ private FileChannel channel;
+
+ /** Construct for the given file. */
+ public InputFile(File file) throws IOException {
+ this.channel = new FileInputStream(file).getChannel();
+ }
+
+ @Override
+ public long length() throws IOException { return channel.size(); }
+
+ @Override
+ public int read(long position, byte[] b, int start, int len)
+ throws IOException {
+ return channel.read(ByteBuffer.wrap(b, start, len), position);
+ }
+
+ @Override
+ public void close() throws IOException { channel.close(); }
+
+}
+
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Map;
+import java.util.LinkedHashMap;
+
+/** Base class for metadata. */
+public class MetaData<T extends MetaData> extends LinkedHashMap<String,byte[]> {
+
+ static final String RESERVED_KEY_PREFIX = "trevni.";
+
+ static final String CODEC_KEY = RESERVED_KEY_PREFIX + "codec";
+ static final String CHECKSUM_KEY = RESERVED_KEY_PREFIX + "checksum";
+
+ public static final Charset UTF8 = Charset.forName("UTF-8");
+
+ private MetaData<?> defaults;
+
+ void setDefaults(MetaData defaults) { this.defaults = defaults; }
+
+ /** Return the compression codec name. */
+ public String getCodec() { return getString(CODEC_KEY); }
+
+ /** Set the compression codec name. */
+ public T setCodec(String codec) {
+ setReserved(CODEC_KEY, codec);
+ return (T)this;
+ }
+
+ /** Return the checksum algorithm name. */
+ public String getChecksum() { return getString(CHECKSUM_KEY); }
+
+ /** Set the checksum algorithm name. */
+ public T setChecksum(String checksum) {
+ setReserved(CHECKSUM_KEY, checksum);
+ return (T)this;
+ }
+
+ /** Return the value of a metadata property as a String. */
+ public String getString(String key) {
+ byte[] value = get(key);
+ if (value == null && defaults != null)
+ value = defaults.get(key);
+ if (value == null)
+ return null;
+ return new String(value, UTF8);
+ }
+
+ /** Return the value of a metadata property as a long. */
+ public long getLong(String key) {
+ return Long.parseLong(getString(key));
+ }
+
+ /** Return true iff a key has any value, false if it is not present. */
+ public boolean getBoolean(String key) {
+ return get(key) != null;
+ }
+
+ /** Set a metadata property to a binary value. */
+ public T set(String key, byte[] value) {
+ if (isReserved(key)) {
+ throw new TrevniRuntimeException("Cannot set reserved key: " + key);
+ }
+ put(key, value);
+ return (T)this;
+ }
+
+ /** Test if a metadata key is reserved. */
+ public static boolean isReserved(String key) {
+ return key.startsWith(RESERVED_KEY_PREFIX);
+ }
+
+ /** Set a metadata property to a String value. */
+ public T set(String key, String value) {
+ return set(key, value.getBytes(UTF8));
+ }
+
+ T setReserved(String key, String value) {
+ put(key, value.getBytes(UTF8));
+ return (T)this;
+ }
+
+ T setReservedBoolean(String key, boolean value) {
+ if (value)
+ setReserved(key, "");
+ else
+ remove(key);
+ return (T)this;
+ }
+
+ /** Set a metadata property to a long value. */
+ public T set(String key, long value) {
+ return set(key, Long.toString(value));
+ }
+
+ void write(OutputBuffer out) throws IOException {
+ out.writeInt(size());
+ for (Map.Entry<String,byte[]> e : entrySet()) {
+ out.writeString(e.getKey());
+ out.writeBytes(e.getValue());
+ }
+ }
+
+ static void read(InputBuffer in, MetaData<?> metaData) throws IOException {
+ int size = in.readInt();
+ for (int i = 0; i < size; i++)
+ metaData.put(in.readString(), in.readBytes());
+ }
+
+ @Override public String toString() {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("{ ");
+ for (Map.Entry<String,byte[]> e : entrySet()) {
+ buffer.append(e.getKey());
+ buffer.append("=");
+ try {
+ buffer.append(new String(e.getValue(), "ISO-8859-1"));
+ } catch (java.io.UnsupportedEncodingException error) {
+ throw new TrevniRuntimeException(error);
+ }
+ buffer.append(" ");
+ }
+ buffer.append("}");
+ return buffer.toString();
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.nio.ByteBuffer;
+
+/** Implements "null" (empty) checksum. */
+final class NullChecksum extends Checksum {
+
+ @Override public int size() { return 0; }
+
+ @Override public ByteBuffer compute(ByteBuffer data) {
+ return ByteBuffer.allocate(0);
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/** Implements "null" (pass through) codec. */
+final class NullCodec extends Codec {
+
+ @Override ByteBuffer compress(ByteBuffer buffer) throws IOException {
+ return buffer;
+ }
+
+ @Override ByteBuffer decompress(ByteBuffer data) throws IOException {
+ return data;
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.ByteArrayOutputStream;
+import java.nio.charset.Charset;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/** Used to write values. */
+class OutputBuffer extends ByteArrayOutputStream {
+ static final int BLOCK_SIZE = 64 * 1024;
+
+ public OutputBuffer() { super(BLOCK_SIZE + BLOCK_SIZE >> 2); }
+
+ public boolean isFull() { return size() >= BLOCK_SIZE; }
+
+ public ByteBuffer asByteBuffer() { return ByteBuffer.wrap(buf, 0, count); }
+
+ public void writeValue(Object value, ValueType type)
+ throws IOException {
+ switch (type) {
+ case NULL:
+ break;
+ case INT:
+ writeInt((Integer)value); break;
+ case LONG:
+ writeLong((Long)value); break;
+ case FIXED32:
+ writeFixed32((Integer)value); break;
+ case FIXED64:
+ writeFixed64((Long)value); break;
+ case FLOAT:
+ writeFloat((Float)value); break;
+ case DOUBLE:
+ writeDouble((Double)value); break;
+ case STRING:
+ writeString((String)value); break;
+ case BYTES:
+ if (value instanceof ByteBuffer)
+ writeBytes((ByteBuffer)value);
+ else
+ writeBytes((byte[])value);
+ break;
+ default:
+ throw new TrevniRuntimeException("Unknown value type: "+type);
+ }
+ }
+
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
+ public void writeString(String string) throws IOException {
+ byte[] bytes = string.getBytes(UTF8);
+ writeInt(bytes.length);
+ write(bytes, 0, bytes.length);
+ }
+
+ public void writeBytes(ByteBuffer bytes) throws IOException {
+ int pos = bytes.position();
+ int start = bytes.arrayOffset() + pos;
+ int len = bytes.limit() - pos;
+ writeBytes(bytes.array(), start, len);
+ }
+
+ public void writeBytes(byte[] bytes) throws IOException {
+ writeBytes(bytes, 0, bytes.length);
+ }
+
+ public void writeBytes(byte[] bytes, int start, int len) throws IOException {
+ writeInt(len);
+ write(bytes, start, len);
+ }
+
+ public void writeFloat(float f) throws IOException {
+ writeFixed32(Float.floatToRawIntBits(f));
+ }
+
+ public void writeDouble(double d) throws IOException {
+ writeFixed64(Double.doubleToRawLongBits(d));
+ }
+
+ public void writeFixed32(int i) throws IOException {
+ ensure(4);
+ buf[count ] = (byte)((i ) & 0xFF);
+ buf[count+1] = (byte)((i >>> 8) & 0xFF);
+ buf[count+2] = (byte)((i >>> 16) & 0xFF);
+ buf[count+3] = (byte)((i >>> 24) & 0xFF);
+ count += 4;
+ }
+
+ public void writeFixed64(long l) throws IOException {
+ ensure(8);
+ int first = (int)(l & 0xFFFFFFFF);
+ int second = (int)((l >>> 32) & 0xFFFFFFFF);
+ buf[count ] = (byte)((first ) & 0xFF);
+ buf[count+4] = (byte)((second ) & 0xFF);
+ buf[count+5] = (byte)((second >>> 8) & 0xFF);
+ buf[count+1] = (byte)((first >>> 8) & 0xFF);
+ buf[count+2] = (byte)((first >>> 16) & 0xFF);
+ buf[count+6] = (byte)((second >>> 16) & 0xFF);
+ buf[count+7] = (byte)((second >>> 24) & 0xFF);
+ buf[count+3] = (byte)((first >>> 24) & 0xFF);
+ count += 8;
+ }
+
+ public void writeInt(int n) throws IOException {
+ ensure(5);
+ n = (n << 1) ^ (n >> 31); // move sign to low-order bit
+ if ((n & ~0x7F) != 0) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ }
+ }
+ }
+ }
+ buf[count++] = (byte) n;
+ }
+
+ public void writeLong(long n) throws IOException {
+ ensure(10);
+ n = (n << 1) ^ (n >> 63); // move sign to low-order bit
+ if ((n & ~0x7FL) != 0) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ if (n > 0x7F) {
+ buf[count++] = (byte)((n | 0x80) & 0xFF);
+ n >>>= 7;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ buf[count++] = (byte) n;
+ }
+
+ private void ensure(int n) {
+ if (count + n > buf.length)
+ buf = Arrays.copyOf(buf, Math.max(buf.length << 1, count + n));
+ }
+
+ public static int size(Object value, ValueType type) {
+ switch (type) {
+ case NULL:
+ return 0;
+ case INT:
+ return size((Integer)value);
+ case LONG:
+ return size((Long)value);
+ case FIXED32:
+ case FLOAT:
+ return 4;
+ case FIXED64:
+ case DOUBLE:
+ return 8;
+ case STRING:
+ return size((String)value);
+ case BYTES:
+ if (value instanceof ByteBuffer)
+ return size((ByteBuffer)value);
+ return size((byte[])value);
+ default:
+ throw new TrevniRuntimeException("Unknown value type: "+type);
+ }
+ }
+
+ public static int size(int n) {
+ n = (n << 1) ^ (n >> 31); // move sign to low-order bit
+ if (n <= (1<<7*1)-1)
+ return 1;
+ if (n <= (1<<7*2)-1)
+ return 2;
+ if (n <= (1<<7*3)-1)
+ return 3;
+ if (n <= (1<<7*4)-1)
+ return 4;
+ return 5;
+ }
+
+ public static int size(long n) {
+ n = (n << 1) ^ (n >> 63); // move sign to low-order bit
+ if (n <= (1<<7*1)-1)
+ return 1;
+ if (n <= (1<<7*2)-1)
+ return 2;
+ if (n <= (1<<7*3)-1)
+ return 3;
+ if (n <= (1<<7*4)-1)
+ return 4;
+ if (n <= (1<<7*5)-1)
+ return 5;
+ if (n <= (1<<7*6)-1)
+ return 6;
+ if (n <= (1<<7*7)-1)
+ return 7;
+ if (n <= (1<<7*8)-1)
+ return 8;
+ if (n <= (1<<7*9)-1)
+ return 9;
+ return 10;
+ }
+
+ public static int size(ByteBuffer bytes) {
+ int length = bytes.remaining();
+ return size(length) + length;
+ }
+
+ public static int size(byte[] bytes) {
+ int length = bytes.length;
+ return size(length) + length;
+ }
+
+ public static int size(String string) {
+ int length = utf8Length(string);
+ return size(length) + length;
+ }
+
+ private static int utf8Length(String string) {
+ int stringLength = string.length();
+ int utf8Length = 0;
+ for (int i = 0; i < stringLength; i++) {
+ char c = string.charAt(i);
+ int p = c; // code point
+ if (Character.isHighSurrogate(c) // surrogate pair
+ && i != stringLength-1
+ && Character.isLowSurrogate(string.charAt(i+1))) {
+ p = string.codePointAt(i);
+ i++;
+ }
+ if (p <= 0x007F) {
+ utf8Length += 1;
+ } else if (p <= 0x07FF) {
+ utf8Length += 2;
+ } else if (p <= 0x0FFFF) {
+ utf8Length += 3;
+ } else if (p <= 0x01FFFFF) {
+ utf8Length += 4;
+ } else if (p <= 0x03FFFFFF) {
+ utf8Length += 5;
+ } else {
+ utf8Length += 6;
+ }
+ }
+ return utf8Length;
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import org.xerial.snappy.Snappy;
+
+/** Implements <a href="http://code.google.com/p/snappy/">Snappy</a> codec. */
+final class SnappyCodec extends Codec {
+
+ @Override ByteBuffer compress(ByteBuffer in) throws IOException {
+ ByteBuffer out =
+ ByteBuffer.allocate(Snappy.maxCompressedLength(in.remaining()));
+ int size = Snappy.compress(in.array(), in.position(), in.remaining(),
+ out.array(), 0);
+ out.limit(size);
+ return out;
+ }
+
+ @Override ByteBuffer decompress(ByteBuffer in) throws IOException {
+ ByteBuffer out = ByteBuffer.allocate
+ (Snappy.uncompressedLength(in.array(),in.position(),in.remaining()));
+ int size = Snappy.uncompress(in.array(),in.position(),in.remaining(),
+ out.array(), 0);
+ out.limit(size);
+ return out;
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni;
+
+/** Base runtime exception thrown by Trevni. */
+public class TrevniRuntimeException extends RuntimeException {
+ public TrevniRuntimeException(Throwable cause) { super(cause); }
+ public TrevniRuntimeException(String message) { super(message); }
+ public TrevniRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
+
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+/** The datatypes that may be stored in a column. */
+public enum ValueType {
+ NULL, INT, LONG, FIXED32, FIXED64, FLOAT, DOUBLE, STRING, BYTES;
+ private String name;
+ private ValueType() { this.name = this.name().toLowerCase(); }
+
+ /** Return the name of this type. */
+ public String getName() { return name; }
+
+ /** Return a type given its name. */
+ public static ValueType forName(String name) {
+ return valueOf(name.toUpperCase());
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<body>
+A column file format.
+</body>
+</html>
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/main/java/overview.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/overview.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/overview.html (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/overview.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,88 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <title>Avro</title>
+</head>
+<body>Avro is a data serialization system.
+
+ <h2>Overview</h2>
+
+ <p>Avro provides:
+ <ul>
+ <li>Rich data structures.
+ <li>A compact, fast, binary data format.
+ <li>A container file, to store persistent data.
+ <li>Remote procedure call (RPC).
+ <li>Simple integration with dynamic languages. Code generation
+ is not required to read or write data files nor to use or
+ implement RPC protocols. Code generation as an optional
+ optimization, only worth implementing for statically typed
+ languages.
+ </ul>
+
+ <h2>Schemas</h2>
+
+ <p>Avro relies on <i>{@link org.apache.avro.Schema schemas}</i>.
+ When Avro data is read, the schema used when writing it is always
+ present. This permits each datum to be written with no per-value
+ overheads, making serialization both fast and small. This also
+ facilitates use with dynamic, scripting languages, since data,
+ together with its schema, is fully self-describing.
+
+ <p>When Avro data is stored in a {@link
+ org.apache.avro.file.DataFileWriter file}, its schema is stored with
+ it, so that files may be processed later by any program. If the
+ program reading the data expects a different schema this can be
+ easily resolved, since both schemas are present.
+
+ <p>When Avro is used in {@link org.apache.avro.ipc RPC}, the client
+ and server exchange schemas in the connection handshake. (This
+ can be optimized so that, for most calls, no schemas are actually
+ transmitted.) Since both client and server both have the other's
+ full schema, correspondence between same named fields, missing
+ fields, extra fields, etc. can all be easily resolved.
+
+ <p>Avro schemas are defined with
+ with <a href="http://www.json.org/">JSON</a> . This facilitates
+ implementation in languages that already have JSON libraries.
+
+ <h2>Comparison with other systems</h2>
+
+ Avro provides functionality similar to systems such
+ as <a href="http://incubator.apache.org/thrift/">Thrift</a>,
+ <a href="http://code.google.com/protobuf/">Protocol Buffers</a>,
+ etc. Avro differs from these systems in the following fundamental
+ aspects.
+ <ul>
+ <li><i>Dynamic typing</i>: Avro does not require that code be
+ generated. Data is always accompanied by a schema that permits
+ full processing of that data without code generation, static
+ datatypes, etc. This facilitates construction of generic
+ data-processing systems and languages.
+ <li><i>Untagged data</i>: Since the schema is present when data is
+ read, considerably less type information need be encoded with
+ data, resulting in smaller serialization size.</li>
+ <li><i>No manually-assigned field IDs</i>: When a schema changes,
+ both the old and new schema are always present when processing
+ data, so differences may be resolved symbolically, using field
+ names.
+ </ul>
+
+</body>
+</html>
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Collection;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(value = Parameterized.class)
+public class TestColumnFile {
+
+ private static final File FILE = new File("target", "test.trv");
+ private static final int COUNT = 1024*64;
+
+ private String codec;
+ private String checksum;
+
+ public TestColumnFile(String codec, String checksum) {
+ this.codec = codec;
+ this.checksum = checksum;
+ }
+
+ @Parameters public static Collection<Object[]> codecs() {
+ Object[][] data = new Object[][] {{"null", "null"},
+ {"snappy", "crc32"},
+ {"deflate", "crc32"}};
+ return Arrays.asList(data);
+ }
+
+ private ColumnFileMetaData createFileMeta() {
+ return new ColumnFileMetaData()
+ .setCodec(codec)
+ .setChecksum(checksum);
+ }
+
+ @Test public void testEmptyFile() throws Exception {
+ FILE.delete();
+ ColumnFileWriter out = new ColumnFileWriter(createFileMeta());
+ out.writeTo(FILE);
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(0, in.getRowCount());
+ Assert.assertEquals(0, in.getColumnCount());
+ in.close();
+ }
+
+ @Test public void testEmptyColumn() throws Exception {
+ FILE.delete();
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.INT));
+ out.writeTo(FILE);
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(0, in.getRowCount());
+ Assert.assertEquals(1, in.getColumnCount());
+ ColumnValues<Integer> values = in.getValues("test");
+ for (int i : values)
+ throw new Exception("no value should be found");
+ in.close();
+ }
+
+ @Test public void testInts() throws Exception {
+ FILE.delete();
+
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.INT));
+ Random random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ out.writeRow(TestUtil.randomLength(random));
+ out.writeTo(FILE);
+
+ random = TestUtil.createRandom();
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(COUNT, in.getRowCount());
+ Assert.assertEquals(1, in.getColumnCount());
+ Iterator<Integer> i = in.getValues("test");
+ int count = 0;
+ while (i.hasNext()) {
+ Assert.assertEquals(TestUtil.randomLength(random), (int)i.next());
+ count++;
+ }
+ Assert.assertEquals(COUNT, count);
+ }
+
+ @Test public void testLongs() throws Exception {
+ FILE.delete();
+
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.LONG));
+ Random random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ out.writeRow(random.nextLong());
+ out.writeTo(FILE);
+
+ random = TestUtil.createRandom();
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(COUNT, in.getRowCount());
+ Assert.assertEquals(1, in.getColumnCount());
+ Iterator<Long> i = in.getValues("test");
+ int count = 0;
+ while (i.hasNext()) {
+ Assert.assertEquals(random.nextLong(), (long)i.next());
+ count++;
+ }
+ Assert.assertEquals(COUNT, count);
+ }
+
+ @Test public void testStrings() throws Exception {
+ FILE.delete();
+
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.STRING));
+ Random random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ out.writeRow(TestUtil.randomString(random));
+ out.writeTo(FILE);
+
+ random = TestUtil.createRandom();
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(COUNT, in.getRowCount());
+ Assert.assertEquals(1, in.getColumnCount());
+ Iterator<String> i = in.getValues("test");
+ int count = 0;
+ while (i.hasNext()) {
+ Assert.assertEquals(TestUtil.randomString(random), i.next());
+ count++;
+ }
+ Assert.assertEquals(COUNT, count);
+ }
+
+ @Test public void testTwoColumn() throws Exception {
+ FILE.delete();
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("a", ValueType.FIXED32),
+ new ColumnMetaData("b", ValueType.STRING));
+ Random random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ out.writeRow(random.nextInt(), TestUtil.randomString(random));
+ out.writeTo(FILE);
+
+ random = TestUtil.createRandom();
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ Assert.assertEquals(COUNT, in.getRowCount());
+ Assert.assertEquals(2, in.getColumnCount());
+ Iterator<String> i = in.getValues("a");
+ Iterator<String> j = in.getValues("b");
+ int count = 0;
+ while (i.hasNext() && j.hasNext()) {
+ Assert.assertEquals(random.nextInt(), i.next());
+ Assert.assertEquals(TestUtil.randomString(random), j.next());
+ count++;
+ }
+ Assert.assertEquals(COUNT, count);
+ }
+
+ @Test public void testSeekLongs() throws Exception {
+ FILE.delete();
+
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.LONG));
+ Random random = TestUtil.createRandom();
+
+ int seekCount = COUNT/1024;
+ int[] seekRows = new int[seekCount];
+ Map<Integer,Integer> seekRowMap = new HashMap<Integer,Integer>(seekCount);
+ while (seekRowMap.size() < seekCount) {
+ int row = random.nextInt(COUNT);
+ if (!seekRowMap.containsKey(row)) {
+ seekRows[seekRowMap.size()] = row;
+ seekRowMap.put(row, seekRowMap.size());
+ }
+ }
+
+ Long[] seekValues = new Long[seekCount];
+ for (int i = 0; i < COUNT; i++) {
+ long l = random.nextLong();
+ out.writeRow(l);
+ if (seekRowMap.containsKey(i))
+ seekValues[seekRowMap.get(i)] = l;
+ }
+ out.writeTo(FILE);
+
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ ColumnValues<Long> v = in.getValues("test");
+
+ for (int i = 0; i < seekCount; i++) {
+ v.seek(seekRows[i]);
+ Assert.assertEquals(seekValues[i], v.next());
+ }
+
+ }
+
+ @Test public void testSeekStrings() throws Exception {
+ FILE.delete();
+
+ ColumnFileWriter out =
+ new ColumnFileWriter(createFileMeta(),
+ new ColumnMetaData("test", ValueType.STRING)
+ .hasIndexValues(true));
+
+ Random random = TestUtil.createRandom();
+
+ int seekCount = COUNT/1024;
+ Map<Integer,Integer> seekRowMap = new HashMap<Integer,Integer>(seekCount);
+ while (seekRowMap.size() < seekCount) {
+ int row = random.nextInt(COUNT);
+ if (!seekRowMap.containsKey(row))
+ seekRowMap.put(row, seekRowMap.size());
+ }
+
+ String[] values = new String[COUNT];
+ for (int i = 0; i < COUNT; i++)
+ values[i] = TestUtil.randomString(random);
+ Arrays.sort(values);
+
+ String[] seekValues = new String[seekCount];
+ for (int i = 0; i < COUNT; i++) {
+ out.writeRow(values[i]);
+ if (seekRowMap.containsKey(i))
+ seekValues[seekRowMap.get(i)] = values[i];
+ }
+ out.writeTo(FILE);
+
+ ColumnFileReader in = new ColumnFileReader(FILE);
+ ColumnValues<String> v = in.getValues("test");
+
+ for (int i = 0; i < seekCount; i++) {
+ v.seek(seekValues[i]);
+ Assert.assertEquals(seekValues[i], v.next());
+ }
+
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.util.Random;
+
+import java.io.ByteArrayOutputStream;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestIOBuffers {
+
+ private static final int COUNT = 1000;
+
+ @Test public void testEmpty() throws Exception {
+ OutputBuffer out = new OutputBuffer();
+ ByteArrayOutputStream temp = new ByteArrayOutputStream();
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ Assert.assertEquals(0, in.tell());
+ Assert.assertEquals(0, in.length());
+ }
+
+ @Test public void testZero() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ out.writeInt(0);
+ byte[] bytes = out.toByteArray();
+ Assert.assertEquals(1, bytes.length);
+ Assert.assertEquals(0, bytes[0]);
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ Assert.assertEquals(0, in.readInt());
+ }
+
+ @Test public void testInt() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeInt(random.nextInt());
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(random.nextInt(), in.readInt());
+ }
+
+ @Test public void testLong() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeLong(random.nextLong());
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(random.nextLong(), in.readLong());
+ }
+
+ @Test public void testFixed32() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeFixed32(random.nextInt());
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(random.nextInt(), in.readFixed32());
+ }
+
+ @Test public void testFixed64() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeFixed64(random.nextLong());
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(random.nextLong(), in.readFixed64());
+ }
+
+ @Test public void testFloat() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeFloat(random.nextFloat());
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(random.nextFloat(), in.readFloat(), 0);
+ }
+
+ @Test public void testDouble() throws Exception {
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeDouble(Double.MIN_VALUE);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(Double.MIN_VALUE, in.readDouble(), 0);
+ }
+
+ @Test public void testBytes() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeBytes(TestUtil.randomBytes(random));
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(TestUtil.randomBytes(random), in.readBytes(null));
+ }
+
+ @Test public void testString() throws Exception {
+ Random random = TestUtil.createRandom();
+ OutputBuffer out = new OutputBuffer();
+ for (int i = 0; i < COUNT; i++)
+ out.writeString(TestUtil.randomString(random));
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ random = TestUtil.createRandom();
+ for (int i = 0; i < COUNT; i++)
+ Assert.assertEquals(TestUtil.randomString(random), in.readString());
+ }
+ @Test public void testSkipNull() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(null, ValueType.NULL);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.NULL);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipInt() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Integer.MAX_VALUE, ValueType.INT);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.INT);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipLong() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Long.MAX_VALUE, ValueType.LONG);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.LONG);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipFixed32() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Integer.MAX_VALUE, ValueType.FIXED32);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.LONG);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipFixed64() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Long.MAX_VALUE, ValueType.FIXED64);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.LONG);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipFloat() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Float.MAX_VALUE, ValueType.FLOAT);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.FLOAT);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipDouble() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue(Double.MAX_VALUE, ValueType.DOUBLE);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.DOUBLE);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipString() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue("trevni", ValueType.STRING);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.STRING);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+ @Test public void testSkipBytes() throws Exception {
+ long sentinel = Long.MAX_VALUE;
+ OutputBuffer out = new OutputBuffer();
+ out.writeValue("trevni".getBytes(), ValueType.BYTES);
+ out.writeLong(sentinel);
+
+ InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+ in.skipValue(ValueType.BYTES);
+ Assert.assertEquals(sentinel, in.readLong());
+ }
+}
Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Arrays;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestInputBytes {
+
+ private static final int SIZE = 1000;
+ private static final int COUNT = 100;
+
+ @Test public void testRandomReads() throws Exception {
+ Random random = new Random();
+ int length = random.nextInt(SIZE);
+ byte[] data = new byte[length];
+ random.nextBytes(data);
+
+ Input in = new InputBytes(data);
+
+ for (int i = 0; i < COUNT; i++) {
+ int p = random.nextInt(length);
+ int l = Math.min(random.nextInt(SIZE/10), length-p);
+ byte[] buffer = new byte[l];
+ in.read(p, buffer, 0, l);
+ Assert.assertArrayEquals(Arrays.copyOfRange(data, p, p+l), buffer);
+ }
+ }
+}
Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.util.Random;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestUtil {
+
+ private static long seed;
+ private static boolean seedSet;
+
+ /** Returns the random seed for this test run. By default uses the current
+ * time, but a test run can be replicated by specifying the "test.seed"
+ * system property. The seed is printed the first time it's accessed so that
+ * failures can be replicated if needed. */
+ public static long getRandomSeed() {
+ if (!seedSet) {
+ String configured = System.getProperty("test.seed");
+ if (configured != null)
+ seed = Long.valueOf(configured);
+ else
+ seed = System.currentTimeMillis();
+ System.out.println("test.seed="+seed);
+ seedSet = true;
+ }
+ return seed;
+ }
+
+ public static Random createRandom() {
+ return new Random(getRandomSeed());
+ }
+
+ public static ByteBuffer randomBytes(Random random) {
+ byte[] bytes = new byte[randomLength(random)];
+ random.nextBytes(bytes);
+ return ByteBuffer.wrap(bytes);
+ }
+
+ public static String randomString(Random random) {
+ int length = randomLength(random);
+ char[] chars = new char[length];
+ for (int i = 0; i < length; i++)
+ chars[i] = (char)('a'+random.nextInt('z'-'a'));
+ return new String(chars);
+ }
+
+ /** Returns [0-15] 15/16 times.
+ * Returns [0-255] 255/256 times.
+ * Returns [0-4095] 4095/4096 times.
+ * Returns [0-65535] every time. */
+ public static int randomLength(Random random) {
+ int n = random.nextInt();
+ if (n < 0) n = -n;
+ return n &
+ ((n & 0xF0000) != 0
+ ? 0xF
+ : ((n & 0xFF0000) != 0
+ ? 0xFF
+ : ((n & 0xFFF0000) != 0
+ ? 0xFFF
+ : 0xFFFF)));
+ }
+
+ @Test public void testRandomLength() {
+ long total = 0;
+ int count = 1024 * 1024;
+ int min = Short.MAX_VALUE;
+ int max = 0;
+ Random r = createRandom();
+ for (int i = 0; i < count; i++) {
+ int length = randomLength(r);
+ if (min > length) min = length;
+ if (max < length) max = length;
+ total += length;
+ }
+ Assert.assertEquals(0, min);
+ Assert.assertTrue(max > 1024 * 32);
+
+ float average = total / (float)count;
+ Assert.assertTrue(average > 16.0f);
+ Assert.assertTrue(average < 64.0f);
+
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: avro/trunk/lang/java/trevni/doc/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target
Added: avro/trunk/lang/java/trevni/doc/apt/spec.apt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/apt/spec.apt?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/apt/spec.apt (added)
+++ avro/trunk/lang/java/trevni/doc/apt/spec.apt Tue Sep 11 21:35:56 2012
@@ -0,0 +1,467 @@
+~~ Licensed to the Apache Software Foundation (ASF) under one or more
+~~ contributor license agreements. See the NOTICE file distributed with
+~~ this work for additional information regarding copyright ownership.
+~~ The ASF licenses this file to You under the Apache License, Version 2.0
+~~ (the "License"); you may not use this file except in compliance with
+~~ the License. You may obtain a copy of the License at
+~~
+~~ http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License.
+ ---
+ Trevni: A Column File Format
+ ---
+
+Trevni: A Column File Format
+
+ Version 0.1
+
+ DRAFT
+
+ This document is the authoritative specification of a file format.
+ Its intent is to permit compatible, independent implementations that
+ read and/or write files in this format.
+
+Introduction
+
+ Data sets are often described as a <table> composed of <rows> and
+ <columns>. Each record in the dataset is considered a row, with
+ each field of the record occupying a different column. Writing
+ records to a file one-by-one as they are created results in a
+ <row-major> format, like Hadoopâs SequenceFile or Avro data files.
+
+ In many cases higher query performance may be achieved if the data
+ is instead organized in a <column-major> format, where multiple
+ values of a given column are stored adjacently. This document
+ defines such a column-major file format for datasets.
+
+ To permit scalable, distributed query evaluation, datasets are
+ partitioned into row groups, containing distinct collections of
+ rows. Each row group is organized in column-major order, while row
+ groups form a row-major partitioning of the entire dataset.
+
+Rationale
+
+* Goals
+
+ The format is meant satisfy the following goals:
+
+ [[1]] Maximize the size of row groups. Disc drives are used most
+ efficiently when sequentially accessing data. Consider a drive that
+ takes 10ms to seek and transfers at 100MB/second. If a 10-column
+ dataset whose values are all the same size is split into 10MB row
+ groups, then accessing a single column will require a sequence of
+ seek+1MB reads, for a cost of 20ms/MB processed. If the same
+ dataset is split into 100MB row groups then this drops to 11ms/MB
+ processed. This effect is exaggerated for datasets with larger
+ numbers of columns and with columns whose values are smaller than
+ average. So weâd prefer row groups that are 100MB or greater.
+
+ [[1]] Permit random access within a row group. Some queries will
+ first examine one column, and, only when certain relatively rare
+ criteria are met, examine other columns. Rather than iterating
+ through selected columns of the row-group in parallel, one might
+ iterate through one column and randomly access another. This is
+ called support for WHERE clauses, after the SQL operator of that
+ name.
+
+ [[1]] Minimize the number of files per dataset. HDFS is a primary
+ intended deployment platform for these files. The HDFS Namenode
+ requires memory for each file in the filesystem, thus for a format
+ to be HDFS-friendly it should strive to require the minimum number
+ of distinct files.
+
+ [[1]] Support co-location of columns within row-groups. Row groups
+ are the unit of parallel operation on a column dataset. For
+ efficient file i/o, the entirety of a row-group should ideally
+ reside on the host that is evaluating the query in order to avoid
+ network latencies and bottlenecks.
+
+ [[1]] Data integrity. The format should permit applications to
+ detect data corruption. Many file systems may prevent corruption,
+ but files may be moved between filesystems and be subject to
+ corruption at points in that process. It is best if the data in a
+ file can be validated independently.
+
+ [[1]] Extensibility. The format should permit applications to store
+ additional annotations about a datasets in the files, such as type
+ information, origin, etc. Some environments may have metadata
+ stores for such information, but not all do, and files might be
+ moved among systems with different metadata systems. The ability to
+ keep such information within the file simplifies the coordination of
+ such information.
+
+ [[1]] Minimal overhead. The column format should not make datasets
+ appreciably larger. Storage is a primary cost and a choice to use
+ this format should not require additional storage.
+
+ [[1]] Primary format. The column format should be usable as a
+ primary format for datasets, not as an auxiliary, accelerated
+ format. Applications that process a dataset in row-major order
+ should be able to easily consume column files and applications that
+ produce datasets in row-major order should be able to easily
+ generate column files.
+
+* Design
+
+ To meet these goals we propose the following design.
+
+ [[1]] Each row group is a separate file. All values of a column in
+ a file are written contiguously. This maximizes the row group size,
+ optimizing performance when querying few and small columns.
+
+ [[1]] Each file occupies a single HDFS block. A larger than normal
+ block size may be specified, e.g., ~1GB instead of the typical
+ ~100MB. This guarantees co-location and eliminates network use when
+ query processing can be co-located with the file. This also
+ moderates the memory impact on the HDFS Namenode since no small
+ files are written.
+
+ [[1]] Each column in a file is written as a sequence of ~64kB
+ compressed blocks. The sequence is prefixed by a table describing
+ all of the blocks in the column to permit random access within the
+ column.
+
+ [[1]] Application-specific metadata may be added at the file,
+ column, and block levels.
+
+ [[1]] Checksums are included with each block, providing data integrity.
+
+* Discussion
+
+ The use of a single block per file achieves the same effect as the
+ custom block placement policy described in the {{CIF}} paper,
+ but while still permitting HDFS rebalancing and not increasing the
+ number of files in the namespace.
+
+Format Specification
+
+ This section formally describes the proposed column file format.
+
+* Data Model
+
+ We assume a simple data model, where a record is a set of named
+ fields, and the value of each field is a sequence of untyped bytes.
+ A type system may be layered on top of this, as specified in the
+ Type Mapping section below.
+
+* Primitive Values
+
+ We define the following primitive value types:
+
+ * Signed 64-bit <<long>> values are written using a variable-length
+zig-zag coding, where the high-order bit in each byte determines
+whether subsequent bytes are present. For example:
+
+*--------------*------*
+ decimal value | hex bytes
+*--------------*------*
+0 | 00
+*--------------*------*
+-1 | 01
+*--------------*------*
+1 | 02
+*--------------*------*
+...
+*--------------*------*
+-64 | 7f
+*--------------*------*
+64 | 80 01
+*--------------*------*
+...
+*--------------*------*
+
+ * <<bytes>> are encoded as a <long> followed by that many bytes of data.
+
+ * a <<string>> is encoded as a <long> followed by that many bytes of
+ UTF-8 encoded character data.
+
+ For example, the three-character string "foo" would be encoded as
+ the <long> value 3 (encoded as hex 06) followed by the UTF-8
+ encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): 06 66 6f 6f
+
+* Type Names
+
+ The following type names are used to describe column values:
+
+ * <<null>>, requires zero bytes. Sometimes used in array columns.
+
+ * <<int>>, like <long>, but restricted to 32-bit signed values
+
+ * <<long>> 64-bit signed values, represented as above
+
+ * <<fixed32>> 32-bit values stored as four bytes, little-endian.
+
+ * <<fixed64>> 64-bit values stored as eight bytes, little-endian.
+
+ * <<float>> 32-bit IEEE floating point value, little-endian
+
+ * <<double>> 64-bit IEEE floating point value, little-endian
+
+ * <<string>> as above
+
+ * <<bytes>> as above, may be used to encapsulate more complex objects
+
+ []
+
+ Type names are represented as <strings> (UTF-8 encoded, length-prefixed).
+
+* Metadata
+
+ <<Metadata>> consists of:
+
+ * A <long> indicating the number of metadata key/value pairs.
+
+ * For each pair, a <string> key and <bytes> value.
+
+ []
+
+ All metadata properties that start with "trevni." are reserved.
+
+** File Metadata
+
+ The following file metadata properties are defined:
+
+ * <<trevni.codec>> the name of the default compression codec used to
+ compress blocks, as a <string>. Implementations are required to
+ support the "null" codec. Optional. If absent, it is assumed to
+ be "null". Codecs are described in more detail below.
+
+ * <<trevni.checksum>> the name of the checksum algorithm used in this
+ file, as a <string>. Implementations are required to support the
+ "crc-32â checksum. Optional. If absent, it is assumed to be
+ "null". Checksums are described in more detail below.
+
+ []
+
+** Column Metadata
+
+ The following column metadata properties are defined:
+
+ * <<trevni.codec>> the name of the compression codec used to compress
+ the blocks of this column, as a <string>. Implementations are
+ required to support the "null" codec. Optional. If absent, it is
+ assumed to be "null". Codecs are described in more detail below.
+
+ * <<trevni.name>> the name of the column, as a <string>. Required.
+
+ * <<trevni.type>> the type of data in the column. One of the type names
+ above. Required.
+
+ * <<trevni.values>> if present, indicates that the initial value of each
+ block in this column will be stored in the blockâs descriptor.
+ Not permitted for array columns or columns that specify a parent.
+
+ * <<trevni.array>> if present, indicates that each row in this column
+ contains a sequence of values of the named type rather than just a
+ single value. An integer length precedes each sequence of values
+ indicating the count of values in the sequence.
+
+ * <<trevni.parent>> if present, the name of an <array> column whose
+ lengths are also used by this column. Thus values of this column
+ are sequences but no lengths are stored in this column.
+
+ []
+
+ For example, consider the following row, as JSON, where all values
+ are primitive types, but one has multiple values.
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"}
+---
+
+ The columns for this might be specified as:
+
+---
+name=id type=int
+name=date type=long
+name=from type=string
+name=to type=string array=true
+name=content type=string
+---
+
+ If a row contains an array of records, e.g. "received" in the following:
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"
+ "received"=[{"date"=234234234234, "host"="192.168.0.0.1"},
+ {"date"=234234545645, "host"="192.168.0.0.2"}]
+}
+---
+
+ Then one can define a parent column followed by a column for each
+ field in the record, adding the following columns:
+
+---
+name=received type=null array=true
+name=date type=long parent=received
+name=host type=string parent=received
+---
+
+ If an array value itself contains an array, e.g. the "sigs" below:
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"
+ "received"=[{"date"=234234234234, "host"="192.168.0.0.1",
+ "sigs"=[{"algo"="weak", "value"="0af345de"}]},
+ {"date"=234234545645, "host"="192.168.0.0.2",
+ "sigs"=[]}]
+}
+---
+
+ Then a parent column may be defined that itself has a parent column.
+
+---
+name=sigs type=null array=true parent=received
+name=algo type=string parent=sigs
+name=value type=string parent=sigs
+---
+
+** Block Metadata
+
+ No block metadata properties are currently defined.
+
+* File Format
+
+ A <<file>> consists of:
+
+ * A <file header>, followed by
+
+ * one or more <columns>.
+
+ []
+
+ A <<file header>> consists of:
+
+ * Four bytes, ASCII 'T', 'r', 'v', followed by 1.
+
+ * a <fixed64> indicating the number of rows in the file
+
+ * a <fixed32> indicating the number of columns in the file
+
+ * file <metadata>.
+
+ * for each column, its <column metadata>
+
+ * for each column, its starting position in the file as a <fixed64>.
+
+ []
+
+ A <<column>> consists of:
+
+ * A <fixed32> indicating the number of blocks in this column.
+
+ * For each block, a <block descriptor>
+
+ * One or more <blocks>.
+
+ []
+
+ A <<block descriptor>> consists of:
+
+ * A <fixed32> indicating the number of rows in the block
+
+ * A <fixed32> indicating the size in bytes of the block before the
+ codec is applied (excluding checksum).
+
+ * A <fixed32> indicating the size in bytes of the block after the
+ codec is applied (excluding checksum).
+
+ * If this columnâs metadata declares it to include values, the first
+ value in the column, serialized according to this column's type.
+
+ []
+
+ A <<block>> consists of:
+
+ * The serialized column values. If a column is an array column then
+ value sequences are preceded by their length, as an <int>. If a
+ codec is specified, the values and lengths are compressed by that
+ codec.
+
+ * The checksum, as determined by the file metadata.
+
+ []
+
+* Codecs
+
+ [null] The "null" codec simply passes data through uncompressed.
+
+ [deflate] The "deflate" codec writes the data block using the
+ deflate algorithm as specified in RFC 1951.
+
+ [snappy] The "snappy" codec uses Google's Snappy compression library.
+
+* Checksum algorithms
+
+ [null] The "null" checksum contains zero bytes.
+
+ [crc-32] Each "crc-32" checksum contains the four bytes of an ISO
+ 3309 CRC-32 checksum of the uncompressed block data as a fixed32.
+
+* Type Mappings
+
+ We define a standard mapping for how types defined in various
+ serialization systems are represented in a column file. Records
+ from these systems are <shredded> into columns. When records are
+ nested, a depth-first recursive walk can assign a separate column
+ for each primitive value.
+
+** Avro
+
+** Protocol Buffers
+
+** Thrift
+
+Implementation Notes
+
+ Some possible techniques for writing column files include:
+
+ [[1]] Use a standard ~100MB block, buffer in memory up to the block
+ size, then flush the file directly to HDFS. A single reduce task
+ might create multiple output files. The namenode requires memory
+ proportional to the number of names and blocks*replication. This
+ would increase the number of names but not blocks, so this should
+ still be much better than a file per column.
+
+ [[1]] Spill each column to a separate local, temporary file then,
+ when the file is closed, append these files, writing a single file
+ to HDFS whose block size is set to be that of the entire file. This
+ would be a bit slower than and may have trouble when the local disk
+ is full, but it would better use HDFS namespace and further reduce
+ seeks when processing columns whose values are small.
+
+ [[1]] Use a separate mapreduce job to convert row-major files to
+ column-major. The map output would output a by (row#, column#,
+ value) tuple, partitioned by row# but sorted by column# then row#.
+ The reducer could directly write the column file. But the column
+ file format would need to be changed to write counts, descriptors,
+ etc. at the end of files rather than at the front.
+
+ []
+
+ (1) is the simplest to implement and most implementations should
+ start with it.
+
+* References
+
+ {CIF} {{{http://arxiv.org/pdf/1105.4252.pdf}<Column-Oriented Storage
+ Techniques for MapReduce>}}, Floratou, Patel, Shekita, & Tata, VLDB
+ 2011.
+
+ {DREMEL} {{{http://research.google.com/pubs/archive/36632.pdf}<Dremel:
+ Interactive Analysis of Web-Scale Datasets>}}, Melnik, Gubarev, Long,
+ Romer, Shivakumar, & Tolton, VLDB 2010.
Propchange: avro/trunk/lang/java/trevni/doc/apt/spec.apt
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/doc/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/pom.xml (added)
+++ avro/trunk/lang/java/trevni/doc/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>trevni-java</artifactId>
+ <groupId>org.apache.trevni</groupId>
+ <version>1.7.2-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <groupId>org.apache.trevni</groupId>
+ <artifactId>trevni-doc</artifactId>
+ <version>1.7.2-SNAPSHOT</version>
+
+ <name>Trevni Specification</name>
+ <url>http://avro.apache.org/</url>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-site-plugin</artifactId>
+ <version>${maven-site-plugin.version}</version>
+ <configuration>
+ <generateReports>false</generateReports>
+ <siteDirectory>.</siteDirectory>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+ </build>
+
+</project>
Propchange: avro/trunk/lang/java/trevni/doc/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/doc/resources/css/site.css
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/resources/css/site.css?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/resources/css/site.css (added)
+++ avro/trunk/lang/java/trevni/doc/resources/css/site.css Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#banner {
+ height: 93px;
+ background: none;
+}
+
+#bannerLeft img {
+ height: 90px;
+ margin-left: 30px;
+ margin-top: 4px;
+}
+
+#bannerRight img {
+ margin: 17px;
+}
+
Propchange: avro/trunk/lang/java/trevni/doc/resources/css/site.css
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/doc/site.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/site.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/site.xml (added)
+++ avro/trunk/lang/java/trevni/doc/site.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project>
+ <skin>
+ <groupId>org.apache.maven.skins</groupId>
+ <artifactId>maven-stylus-skin</artifactId>
+ <version>1.2</version>
+ </skin>
+ <body>
+ <menu name="Trevni">
+ <item name="Spec" href="spec.html" />
+ </menu>
+ </body>
+</project>
Propchange: avro/trunk/lang/java/trevni/doc/site.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: avro/trunk/lang/java/trevni/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/pom.xml (added)
+++ avro/trunk/lang/java/trevni/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>avro-parent</artifactId>
+ <groupId>org.apache.avro</groupId>
+ <version>1.7.2-SNAPSHOT</version>
+ <relativePath>../</relativePath>
+ </parent>
+
+ <artifactId>trevni-java</artifactId>
+ <name>Trevni Java</name>
+ <groupId>org.apache.trevni</groupId>
+ <description>Trevni Java</description>
+ <url>http://avro.apache.org/</url>
+ <packaging>pom</packaging>
+
+ <modules>
+ <module>core</module>
+ <module>avro</module>
+ <module>doc</module>
+ </modules>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <failIfNoTests>false</failIfNoTests>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>${compiler-plugin.version}</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>${checkstyle-plugin.version}</version>
+ <configuration>
+ <consoleOutput>true</consoleOutput>
+ <configLocation>checkstyle.xml</configLocation>
+ </configuration>
+ <executions>
+ <execution>
+ <id>checkstyle-check</id>
+ <phase>test</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <profiles>
+ </profiles>
+
+</project>
+
Propchange: avro/trunk/lang/java/trevni/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: avro/trunk/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/pom.xml (original)
+++ avro/trunk/pom.xml Tue Sep 11 21:35:56 2012
@@ -251,6 +251,9 @@
<copy todir="${avro.docDir}/java">
<fileset dir="lang/java/target/site/apidocs"/>
</copy>
+ <copy todir="build/avro-doc-${project.version}/trevni">
+ <fileset dir="lang/java/trevni/doc/target/site"/>
+ </copy>
</target>
</configuration>
</plugin>