You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/09/21 21:23:25 UTC
svn commit: r999556 - in /avro/trunk: ./
lang/java/src/java/org/apache/avro/generic/
lang/java/src/java/org/apache/avro/io/
lang/java/src/java/org/apache/avro/util/
Author: cutting
Date: Tue Sep 21 19:23:24 2010
New Revision: 999556
URL: http://svn.apache.org/viewvc?rev=999556&view=rev
Log:
AVRO-668. Java: Reduce object allocations while writing strings. Contributed by scottcarey.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java
avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java
avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java
avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java
avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Sep 21 19:23:24 2010
@@ -23,6 +23,9 @@ Avro 1.4.1 (unreleased)
AVRO-634. IDL: Add support for aliases. (cutting)
+ AVRO-668. Java: Reduce object allocations while writing strings.
+ (scottcarey)
+
BUG FIXES
AVRO-666. Remove an extraneous pdb.set_trace() that crept into schema.py
Modified: avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java Tue Sep 21 19:23:24 2010
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Collection;
import org.apache.avro.AvroTypeException;
@@ -29,7 +28,6 @@ import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
-import org.apache.avro.util.Utf8;
/** {@link DatumWriter} for generic Java objects. */
public class GenericDatumWriter<D> implements DatumWriter<D> {
@@ -188,9 +186,7 @@ public class GenericDatumWriter<D> imple
/** Called to write a string. May be overridden for alternate string
* representations.*/
protected void writeString(Object datum, Encoder out) throws IOException {
- out.writeString(datum instanceof Utf8
- ? (Utf8)datum
- : new Utf8(datum.toString()));
+ out.writeString((CharSequence) datum);
}
/** Called to write a bytes. May be overridden for alternate bytes
Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java Tue Sep 21 19:23:24 2010
@@ -130,10 +130,20 @@ public class BinaryEncoder extends Encod
@Override
public void writeString(Utf8 utf8) throws IOException {
- encodeLong(utf8.getLength(), out);
- out.write(utf8.getBytes(), 0, utf8.getLength());
+ encodeString(utf8.getBytes(), 0, utf8.getByteLength());
}
-
+
+ @Override
+ public void writeString(String string) throws IOException {
+ byte[] bytes = Utf8.getBytesFor(string);
+ encodeString(bytes, 0, bytes.length);
+ }
+
+ private void encodeString(byte[] bytes, int offset, int length) throws IOException {
+ encodeLong(length, out);
+ out.write(bytes, offset, length);
+ }
+
@Override
public void writeBytes(ByteBuffer bytes) throws IOException {
byteWriter.write(bytes);
Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java Tue Sep 21 19:23:24 2010
@@ -85,7 +85,7 @@ public class BlockingBinaryEncoder exten
* _containing_ this block must be in the {@link #OVERFLOW}
* state. */
OVERFLOW
- };
+ }
/** The type of this blocked value (ARRAY or MAP). */
public Schema.Type type;
@@ -313,9 +313,15 @@ public class BlockingBinaryEncoder exten
@Override
public void writeString(Utf8 utf8) throws IOException {
- writeBytes(utf8.getBytes(), 0, utf8.getLength());
-
- assert check();
+ writeBytes(utf8.getBytes(), 0, utf8.getByteLength());
+ // assert called in writeBytes
+ }
+
+ @Override
+ public void writeString(String str) throws IOException {
+ byte[] utf8bytes = Utf8.getBytesFor(str);
+ writeBytes(utf8bytes, 0, utf8bytes.length);
+ // assert called in writeBytes
}
@Override
Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java Tue Sep 21 19:23:24 2010
@@ -98,7 +98,9 @@ public abstract class Encoder implements
public abstract void writeString(Utf8 utf8) throws IOException;
/**
- * Write a Unicode character string.
+ * Write a Unicode character string. The default implementation converts
+ * the String to a {@link org.apache.avro.util.Utf8}. Some Encoder
+ * implementations may want to do something different as a performance optimization.
* @throws AvroTypeException If this is a stateful writer and a
* char-string is not expected
*/
@@ -107,6 +109,20 @@ public abstract class Encoder implements
}
/**
+ * Write a Unicode character string. If the CharSequence is an
+ * {@link org.apache.avro.util.Utf8} it writes this directly, otherwise
+ * the CharSequence is converted to a String via toString() and written.
+ * @throws AvroTypeException If this is a stateful writer and a
+ * char-string is not expected
+ */
+ public void writeString(CharSequence charSequence) throws IOException {
+ if (charSequence instanceof Utf8)
+ writeString((Utf8)charSequence);
+ else
+ writeString(charSequence.toString());
+ }
+
+ /**
* Write a byte string.
* @throws AvroTypeException If this is a stateful writer and a
* byte-string is not expected
Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java Tue Sep 21 19:23:24 2010
@@ -109,12 +109,17 @@ public class JsonEncoder extends Parsing
@Override
public void writeString(Utf8 utf8) throws IOException {
+ writeString(utf8.toString());
+ }
+
+ @Override
+ public void writeString(String str) throws IOException {
parser.advance(Symbol.STRING);
if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
parser.advance(Symbol.MAP_KEY_MARKER);
- out.writeFieldName(utf8.toString());
+ out.writeFieldName(str);
} else {
- out.writeString(utf8.toString());
+ out.writeString(str);
}
}
Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java Tue Sep 21 19:23:24 2010
@@ -105,6 +105,18 @@ public class ValidatingEncoder extends P
parser.advance(Symbol.STRING);
out.writeString(utf8);
}
+
+ @Override
+ public void writeString(String str) throws IOException {
+ parser.advance(Symbol.STRING);
+ out.writeString(str);
+ }
+
+ @Override
+ public void writeString(CharSequence charSequence) throws IOException {
+ parser.advance(Symbol.STRING);
+ out.writeString(charSequence);
+ }
@Override
public void writeBytes(ByteBuffer bytes) throws IOException {
Modified: avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java Tue Sep 21 19:23:24 2010
@@ -18,6 +18,7 @@
package org.apache.avro.util;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import org.apache.avro.io.BinaryData;
@@ -27,6 +28,10 @@ import org.apache.avro.io.BinaryData;
public class Utf8 implements Comparable<Utf8>, CharSequence {
private static final byte[] EMPTY = new byte[0];
+ private static final Charset UTF8_CS;
+ static {
+ UTF8_CS = Charset.forName("UTF-8");
+ }
private byte[] bytes = EMPTY;
private int length;
private String string;
@@ -34,11 +39,7 @@ public class Utf8 implements Comparable<
public Utf8() {}
public Utf8(String string) {
- try {
- this.bytes = string.getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
+ this.bytes = string.getBytes(UTF8_CS);
this.length = bytes.length;
this.string = string;
}
@@ -79,6 +80,7 @@ public class Utf8 implements Comparable<
return this;
}
+ @Override
public String toString() {
if (this.string == null)
try {
@@ -89,6 +91,7 @@ public class Utf8 implements Comparable<
return this.string;
}
+ @Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof Utf8)) return false;
@@ -101,6 +104,7 @@ public class Utf8 implements Comparable<
return true;
}
+ @Override
public int hashCode() {
int hash = 0;
for (int i = 0; i < this.length; i++)
@@ -120,5 +124,9 @@ public class Utf8 implements Comparable<
return toString().subSequence(start, end);
}
+ /** Gets the UTF-8 bytes for a String */
+ public static byte[] getBytesFor(String str) {
+ return str.getBytes(UTF8_CS);
+ }
}