You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/09/21 21:23:25 UTC

svn commit: r999556 - in /avro/trunk: ./ lang/java/src/java/org/apache/avro/generic/ lang/java/src/java/org/apache/avro/io/ lang/java/src/java/org/apache/avro/util/

Author: cutting
Date: Tue Sep 21 19:23:24 2010
New Revision: 999556

URL: http://svn.apache.org/viewvc?rev=999556&view=rev
Log:
AVRO-668.  Java: Reduce object allocations while writing strings.  Contributed by scottcarey.

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java
    avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java
    avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java
    avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java
    avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
    avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
    avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Sep 21 19:23:24 2010
@@ -23,6 +23,9 @@ Avro 1.4.1 (unreleased)
 
     AVRO-634. IDL: Add support for aliases. (cutting)
 
+    AVRO-668. Java: Reduce object allocations while writing strings.
+    (scottcarey)
+
   BUG FIXES
 
     AVRO-666. Remove an extraneous pdb.set_trace() that crept into schema.py

Modified: avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumWriter.java Tue Sep 21 19:23:24 2010
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Collection;
 
 import org.apache.avro.AvroTypeException;
@@ -29,7 +28,6 @@ import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.io.DatumWriter;
 import org.apache.avro.io.Encoder;
-import org.apache.avro.util.Utf8;
 
 /** {@link DatumWriter} for generic Java objects. */
 public class GenericDatumWriter<D> implements DatumWriter<D> {
@@ -188,9 +186,7 @@ public class GenericDatumWriter<D> imple
   /** Called to write a string.  May be overridden for alternate string
    * representations.*/
   protected void writeString(Object datum, Encoder out) throws IOException {
-    out.writeString(datum instanceof Utf8
-                    ? (Utf8)datum
-                    : new Utf8(datum.toString()));
+    out.writeString((CharSequence) datum);
   }
 
   /** Called to write a bytes.  May be overridden for alternate bytes

Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/BinaryEncoder.java Tue Sep 21 19:23:24 2010
@@ -130,10 +130,20 @@ public class BinaryEncoder extends Encod
 
   @Override
   public void writeString(Utf8 utf8) throws IOException {
-    encodeLong(utf8.getLength(), out);
-    out.write(utf8.getBytes(), 0, utf8.getLength());
+    encodeString(utf8.getBytes(), 0, utf8.getByteLength());
   }
-
+  
+  @Override
+  public void writeString(String string) throws IOException {
+    byte[] bytes = Utf8.getBytesFor(string);
+    encodeString(bytes, 0, bytes.length);
+  }
+  
+  private void encodeString(byte[] bytes, int offset, int length) throws IOException {
+    encodeLong(length, out);
+    out.write(bytes, offset, length);
+  }
+  
   @Override
   public void writeBytes(ByteBuffer bytes) throws IOException {
     byteWriter.write(bytes);

Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/BlockingBinaryEncoder.java Tue Sep 21 19:23:24 2010
@@ -85,7 +85,7 @@ public class BlockingBinaryEncoder exten
        *  _containing_ this block must be in the {@link #OVERFLOW}
        *  state. */
      OVERFLOW
-    };
+    }
 
     /** The type of this blocked value (ARRAY or MAP). */
     public Schema.Type type;
@@ -313,9 +313,15 @@ public class BlockingBinaryEncoder exten
 
   @Override
   public void writeString(Utf8 utf8) throws IOException {
-    writeBytes(utf8.getBytes(), 0, utf8.getLength());
-
-    assert check();
+    writeBytes(utf8.getBytes(), 0, utf8.getByteLength());
+    // assert called in writeBytes
+  }
+  
+  @Override
+  public void writeString(String str) throws IOException {
+    byte[] utf8bytes = Utf8.getBytesFor(str);
+    writeBytes(utf8bytes, 0, utf8bytes.length);
+    // assert called in writeBytes
   }
 
   @Override

Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/Encoder.java Tue Sep 21 19:23:24 2010
@@ -98,7 +98,9 @@ public abstract class Encoder implements
   public abstract void writeString(Utf8 utf8) throws IOException;
 
   /**
-   * Write a Unicode character string.
+   * Write a Unicode character string.  The default implementation converts
+   * the String to a {@link org.apache.avro.util.Utf8}.  Some Encoder 
+   * implementations may want to do something different as a performance optimization.
    * @throws AvroTypeException If this is a stateful writer and a
    * char-string is not expected
    */
@@ -107,6 +109,20 @@ public abstract class Encoder implements
   }
 
   /**
+   * Write a Unicode character string.  If the CharSequence is an
+   * {@link org.apache.avro.util.Utf8} it writes this directly, otherwise
+   * the CharSequence is converted to a String via toString() and written.
+   * @throws AvroTypeException If this is a stateful writer and a
+   * char-string is not expected
+   */
+  public void writeString(CharSequence charSequence) throws IOException {
+    if (charSequence instanceof Utf8)
+      writeString((Utf8)charSequence);
+    else
+      writeString(charSequence.toString());
+  }
+  
+  /**
    * Write a byte string.
    * @throws AvroTypeException If this is a stateful writer and a
    *         byte-string is not expected

Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/JsonEncoder.java Tue Sep 21 19:23:24 2010
@@ -109,12 +109,17 @@ public class JsonEncoder extends Parsing
 
   @Override
   public void writeString(Utf8 utf8) throws IOException {
+    writeString(utf8.toString());
+  }
+  
+  @Override 
+  public void writeString(String str) throws IOException {
     parser.advance(Symbol.STRING);
     if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) {
       parser.advance(Symbol.MAP_KEY_MARKER);
-      out.writeFieldName(utf8.toString());
+      out.writeFieldName(str);
     } else {
-      out.writeString(utf8.toString());
+      out.writeString(str);
     }
   }
 

Modified: avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/io/ValidatingEncoder.java Tue Sep 21 19:23:24 2010
@@ -105,6 +105,18 @@ public class ValidatingEncoder extends P
     parser.advance(Symbol.STRING);
     out.writeString(utf8);
   }
+  
+  @Override
+  public void writeString(String str) throws IOException {
+    parser.advance(Symbol.STRING);
+    out.writeString(str);
+  }
+  
+  @Override
+  public void writeString(CharSequence charSequence) throws IOException {
+    parser.advance(Symbol.STRING);
+    out.writeString(charSequence);
+  }
 
   @Override
   public void writeBytes(ByteBuffer bytes) throws IOException {

Modified: avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java?rev=999556&r1=999555&r2=999556&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/util/Utf8.java Tue Sep 21 19:23:24 2010
@@ -18,6 +18,7 @@
 package org.apache.avro.util;
 
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 
 import org.apache.avro.io.BinaryData;
 
@@ -27,6 +28,10 @@ import org.apache.avro.io.BinaryData;
 public class Utf8 implements Comparable<Utf8>, CharSequence {
   private static final byte[] EMPTY = new byte[0];
 
+  private static final Charset UTF8_CS;
+  static {
+    UTF8_CS = Charset.forName("UTF-8");
+  }
   private byte[] bytes = EMPTY;
   private int length;
   private String string;
@@ -34,11 +39,7 @@ public class Utf8 implements Comparable<
   public Utf8() {}
 
   public Utf8(String string) {
-    try {
-      this.bytes = string.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException(e);
-    }
+    this.bytes = string.getBytes(UTF8_CS);
     this.length = bytes.length;
     this.string = string;
   }
@@ -79,6 +80,7 @@ public class Utf8 implements Comparable<
     return this;
   }
 
+  @Override
   public String toString() {
     if (this.string == null)
       try {
@@ -89,6 +91,7 @@ public class Utf8 implements Comparable<
     return this.string;
   }
 
+  @Override
   public boolean equals(Object o) {
     if (o == this) return true;
     if (!(o instanceof Utf8)) return false;
@@ -101,6 +104,7 @@ public class Utf8 implements Comparable<
     return true;
   }
 
+  @Override
   public int hashCode() {
     int hash = 0;
     for (int i = 0; i < this.length; i++)
@@ -120,5 +124,9 @@ public class Utf8 implements Comparable<
     return toString().subSequence(start, end);
   }
 
+  /** Gets the UTF-8 bytes for a String */
+  public static byte[] getBytesFor(String str) {
+    return str.getBytes(UTF8_CS);
+  }
 
 }