You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2013/03/28 00:42:31 UTC
svn commit: r1461896 - in /avro/trunk: ./
lang/java/trevni/core/src/main/java/org/apache/trevni/
lang/java/trevni/doc/apt/
Author: cutting
Date: Wed Mar 27 23:42:30 2013
New Revision: 1461896
URL: http://svn.apache.org/r1461896
Log:
AVRO-1259. Java: Improve Trevni's encoding of sparse columns.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java
avro/trunk/lang/java/trevni/doc/apt/spec.apt
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Wed Mar 27 23:42:30 2013
@@ -19,6 +19,9 @@ Trunk (not yet released)
types BigDecimal, BigInteger, URI, URL, Date and File can now be
fields in generated classes. (Alexandre Normand and cutting)
+ AVRO-1259. Java: Improve Trevni's encoding of sparse columns.
+ (cutting)
+
BUG FIXES
Avro 1.7.4 (22 February 2012)
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java Wed Mar 27 23:42:30 2013
@@ -23,6 +23,11 @@ import java.io.IOException;
class ArrayColumnOutputBuffer extends ColumnOutputBuffer {
private int length; // remaining in current array
+ private static final int NONE = -1;
+
+ private int runLength; // length of current run
+ private int runValue = NONE; // what kind of run
+
public ArrayColumnOutputBuffer(ColumnFileWriter writer, ColumnMetaData meta)
throws IOException {
super(writer, meta);
@@ -30,16 +35,47 @@ class ArrayColumnOutputBuffer extends Co
assert !getMeta().hasIndexValues();
}
- @Override public void writeLength(int length) throws IOException {
+ @Override public void writeLength(int l) throws IOException {
assert this.length == 0;
- this.length = length;
- getBuffer().writeLength(length);
+ assert l >= 0;
+ this.length = l;
+ if (l == runValue) {
+ runLength++; // continue a run
+ return;
+ }
+ flushRun(); // end a run
+ if (l == 1 || l == 0) {
+ runLength = 1; // start a run
+ runValue = l;
+ } else {
+ getBuffer().writeLength(l); // not a run
+ }
}
@Override public void writeValue(Object value) throws IOException {
assert length > 0;
- getBuffer().writeValue(value, getMeta().getType());
+ if (getMeta().getType() != ValueType.NULL) {
+ flushRun();
+ getBuffer().writeValue(value, getMeta().getType());
+ }
length -= 1;
}
+ @Override void flushBuffer() throws IOException {
+ flushRun();
+ super.flushBuffer();
+ }
+
+ private void flushRun() throws IOException {
+ if (runLength == 0) // not in run
+ return;
+ else if (runLength == 1) // single value
+ getBuffer().writeLength(runValue);
+ else // a run
+ getBuffer().writeLength((3-runValue)-(runLength<<1));
+
+ runLength = 0; // reset
+ runValue = NONE;
+ }
+
}
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java Wed Mar 27 23:42:30 2013
@@ -111,6 +111,7 @@ public class ColumnFileReader implements
throw new IOException("Not a data file.");
}
if (!(Arrays.equals(ColumnFileWriter.MAGIC, magic)
+ || !Arrays.equals(ColumnFileWriter.MAGIC_1, magic)
|| !Arrays.equals(ColumnFileWriter.MAGIC_0, magic)))
throw new IOException("Not a data file.");
}
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java Wed Mar 27 23:42:30 2013
@@ -30,7 +30,8 @@ import java.util.HashSet;
public class ColumnFileWriter {
static final byte[] MAGIC_0 = new byte[] {'T', 'r', 'v', 0};
- static final byte[] MAGIC = new byte[] {'T', 'r', 'v', 1};
+ static final byte[] MAGIC_1 = new byte[] {'T', 'r', 'v', 1};
+ static final byte[] MAGIC = new byte[] {'T', 'r', 'v', 2};
private ColumnFileMetaData metaData;
private ColumnOutputBuffer[] columns;
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java Wed Mar 27 23:42:30 2013
@@ -70,7 +70,7 @@ class ColumnOutputBuffer {
rowCount++;
}
- private void flushBuffer() throws IOException {
+ void flushBuffer() throws IOException {
if (rowCount == 0) return;
ByteBuffer raw = buffer.asByteBuffer();
ByteBuffer c = codec.compress(raw);
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java Wed Mar 27 23:42:30 2013
@@ -38,6 +38,9 @@ class InputBuffer {
private int bitCount; // position in booleans
+ private int runLength; // length of run
+ private int runValue; // value of run
+
public InputBuffer(Input in) throws IOException { this(in, 0); }
public InputBuffer(Input in, long position) throws IOException {
@@ -56,6 +59,7 @@ class InputBuffer {
}
public void seek(long position) throws IOException {
+ runLength = 0;
if (position >= (offset-limit) && position <= offset) {
pos = (int)(limit - (offset - position)); // seek in buffer;
return;
@@ -133,7 +137,18 @@ class InputBuffer {
public int readLength() throws IOException {
bitCount = 0;
- return readInt();
+ if (runLength > 0) {
+ runLength--; // in run
+ return runValue;
+ }
+
+ int length = readInt();
+ if (length >= 0) // not a run
+ return length;
+
+ runLength = (1-length)>>>1; // start of run
+ runValue = (length+1) & 1;
+ return runValue;
}
public int readInt() throws IOException {
Modified: avro/trunk/lang/java/trevni/doc/apt/spec.apt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/apt/spec.apt?rev=1461896&r1=1461895&r2=1461896&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/doc/apt/spec.apt (original)
+++ avro/trunk/lang/java/trevni/doc/apt/spec.apt Wed Mar 27 23:42:30 2013
@@ -261,7 +261,9 @@ whether subsequent bytes are present. F
* <<trevni.array>> if present, indicates that each row in this column
contains a sequence of values of the named type rather than just a
single value. An integer length precedes each sequence of values
- indicating the count of values in the sequence.
+ indicating the count of values in the sequence. If the length is
+ negative then it indicates a sequence of zero or one lengths, where -1
+ indicates two zeros, -2 two ones, -3 three zeros, -4 three ones, etc.
* <<trevni.parent>> if present, the name of an <array> column whose
lengths are also used by this column. Thus values of this column
@@ -348,7 +350,7 @@ name=value type=string par
A <<file header>> consists of:
- * Four bytes, ASCII 'T', 'r', 'v', followed by 1.
+ * Four bytes, ASCII 'T', 'r', 'v', followed by 0x02.
* a <fixed64> indicating the number of rows in the file