You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/04/19 12:45:25 UTC

svn commit: r935521 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/analysis/tokenattributes/ test/org/apache/lucene/analysis/tokenattributes/

Author: uschindler
Date: Mon Apr 19 10:45:25 2010
New Revision: 935521

URL: http://svn.apache.org/viewvc?rev=935521&view=rev
Log:
LUCENE-2401: Improve CharTermAttribute performance

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java Mon Apr 19 10:45:25 2010
@@ -68,4 +68,24 @@ public interface CharTermAttribute exten
   public CharTermAttribute append(CharSequence csq, int start, int end);
   public CharTermAttribute append(char c);
 
+  /** Appends the specified {@code String} to this character sequence. 
+   * <p>The characters of the {@code String} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(String s);
+
+  /** Appends the specified {@code StringBuilder} to this character sequence. 
+   * <p>The characters of the {@code StringBuilder} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(StringBuilder sb);
+
+  /** Appends the contents of the other {@code CharTermAttribute} to this character sequence. 
+   * <p>The characters of the {@code CharTermAttribute} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(CharTermAttribute termAtt);
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Mon Apr 19 10:45:25 2010
@@ -41,7 +41,7 @@ public class CharTermAttributeImpl exten
     return new String(termBuffer, 0, termLength);
   }
 
-  public void copyBuffer(char[] buffer, int offset, int length) {
+  public final void copyBuffer(char[] buffer, int offset, int length) {
     growTermBuffer(length);
     System.arraycopy(buffer, offset, termBuffer, 0, length);
     termLength = length;
@@ -69,7 +69,7 @@ public class CharTermAttributeImpl exten
     termLength = length;
   }
 
-  public char[] buffer() {
+  public final char[] buffer() {
     return termBuffer;
   }
 
@@ -78,7 +78,7 @@ public class CharTermAttributeImpl exten
     return termBuffer;
   }
   
-  public char[] resizeBuffer(int newSize) {
+  public final char[] resizeBuffer(int newSize) {
     if(termBuffer.length < newSize){
       // Not big enough; create a new array with slight
       // over allocation and preserve content
@@ -107,14 +107,14 @@ public class CharTermAttributeImpl exten
     return termLength;
   }
 
-  public CharTermAttribute setLength(int length) {
+  public final CharTermAttribute setLength(int length) {
     if (length > termBuffer.length)
       throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
     termLength = length;
     return this;
   }
   
-  public CharTermAttribute setEmpty() {
+  public final CharTermAttribute setEmpty() {
     termLength = 0;
     return this;
   }
@@ -125,7 +125,7 @@ public class CharTermAttributeImpl exten
   }
   
   // *** TermToBytesRefAttribute interface ***
-  public int toBytesRef(BytesRef target) {
+  public final int toBytesRef(BytesRef target) {
     // TODO: Maybe require that bytes is already initialized? TermsHashPerField ensures this.
     if (target.bytes == null) {
       target.bytes = new byte[termLength * 4];
@@ -134,53 +134,109 @@ public class CharTermAttributeImpl exten
   }
   
   // *** CharSequence interface ***
-  public int length() {
+  public final int length() {
     return termLength;
   }
   
-  public char charAt(int index) {
+  public final char charAt(int index) {
     if (index >= termLength)
       throw new IndexOutOfBoundsException();
     return termBuffer[index];
   }
   
-  public CharSequence subSequence(final int start, final int end) {
+  public final CharSequence subSequence(final int start, final int end) {
     if (start > termLength || end > termLength)
       throw new IndexOutOfBoundsException();
     return new String(termBuffer, start, end - start);
   }
   
   // *** Appendable interface ***
-  public CharTermAttribute append(CharSequence csq) {
+
+  public final CharTermAttribute append(CharSequence csq) {
+    if (csq == null) // needed for Appendable compliance
+      return appendNull();
     return append(csq, 0, csq.length());
   }
   
-  public CharTermAttribute append(CharSequence csq, int start, int end) {
-    resizeBuffer(termLength + end - start);
-    if (csq instanceof String) {
-      ((String) csq).getChars(start, end, termBuffer, termLength);
-    } else if (csq instanceof StringBuilder) {
-      ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
-    } else if (csq instanceof StringBuffer) {
-      ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
-    } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
-      final CharBuffer cb = (CharBuffer) csq;
-      System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, end - start);
+  public final CharTermAttribute append(CharSequence csq, int start, int end) {
+    if (csq == null) // needed for Appendable compliance
+      csq = "null";
+    final int len = end - start, csqlen = csq.length();
+    if (len < 0 || start > csqlen || end > csqlen)
+      throw new IndexOutOfBoundsException();
+    if (len == 0)
+      return this;
+    resizeBuffer(termLength + len);
+    if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
+      if (csq instanceof String) {
+        ((String) csq).getChars(start, end, termBuffer, termLength);
+      } else if (csq instanceof StringBuilder) {
+        ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
+      } else if (csq instanceof CharTermAttribute) {
+        System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
+      } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
+        final CharBuffer cb = (CharBuffer) csq;
+        System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
+      } else if (csq instanceof StringBuffer) {
+        ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
+      } else {
+        while (start < end)
+          termBuffer[termLength++] = csq.charAt(start++);
+        // no fall-through here, as termLength is updated!
+        return this;
+      }
+      termLength += len;
+      return this;
     } else {
       while (start < end)
         termBuffer[termLength++] = csq.charAt(start++);
-      // no fall-through here, as termLength is updated!
       return this;
     }
-    termLength += end - start;
-    return this;
   }
   
-  public CharTermAttribute append(char c) {
+  public final CharTermAttribute append(char c) {
     resizeBuffer(termLength + 1)[termLength++] = c;
     return this;
   }
   
+  // *** For performance some convenience methods in addition to CSQ's ***
+  
+  public final CharTermAttribute append(String s) {
+    if (s == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = s.length();
+    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+    termLength += len;
+    return this;
+  }
+  
+  public final CharTermAttribute append(StringBuilder s) {
+    if (s == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = s.length();
+    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+    termLength += len;
+    return this;
+  }
+  
+  public final CharTermAttribute append(CharTermAttribute ta) {
+    if (ta == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = ta.length();
+    System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
+    termLength += len;
+    return this;
+  }
+
+  private CharTermAttribute appendNull() {
+    resizeBuffer(termLength + 4);
+    termBuffer[termLength++] = 'n';
+    termBuffer[termLength++] = 'u';
+    termBuffer[termLength++] = 'l';
+    termBuffer[termLength++] = 'l';
+    return this;
+  }
+  
   // *** AttributeImpl ***
 
   @Override

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java Mon Apr 19 10:45:25 2010
@@ -22,6 +22,7 @@ import java.nio.CharBuffer;
 import java.util.Formatter;
 import java.util.Locale;
 import java.util.regex.Pattern;
+import java.util.Random;
 
 public class TestCharTermAttributeImpl extends LuceneTestCase {
 
@@ -157,21 +158,155 @@ public class TestCharTermAttributeImpl e
     assertEquals("12345678", t.toString());
     t.append('9');
     assertEquals("123456789", t.toString());
-    t.append("0");
+    t.append((CharSequence) "0");
     assertEquals("1234567890", t.toString());
-    t.append("0123456789", 1, 3);
+    t.append((CharSequence) "0123456789", 1, 3);
     assertEquals("123456789012", t.toString());
-    t.append(CharBuffer.wrap("0123456789".toCharArray()), 3, 5);
+    t.append((CharSequence) CharBuffer.wrap("0123456789".toCharArray()), 3, 5);
     assertEquals("12345678901234", t.toString());
-    t.append(t);
+    t.append((CharSequence) t);
     assertEquals("1234567890123412345678901234", t.toString());
-    t.append(new StringBuilder("0123456789"), 5, 7);
+    t.append((CharSequence) new StringBuilder("0123456789"), 5, 7);
     assertEquals("123456789012341234567890123456", t.toString());
-    t.append(new StringBuffer(t));
+    t.append((CharSequence) new StringBuffer(t));
     assertEquals("123456789012341234567890123456123456789012341234567890123456", t.toString());
     // very wierd, to test if a subSlice is wrapped correct :)
-    t.setEmpty().append(CharBuffer.wrap("0123456789".toCharArray(), 3, 5) /* "34" */, 1, 2);
+    CharBuffer buf = CharBuffer.wrap("0123456789".toCharArray(), 3, 5);
+    assertEquals("34567", buf.toString());
+    t.setEmpty().append((CharSequence) buf, 1, 2);
     assertEquals("4", t.toString());
+    CharTermAttribute t2 = new CharTermAttributeImpl();
+    t2.append("test");
+    t.append((CharSequence) t2);
+    assertEquals("4test", t.toString());
+    t.append((CharSequence) t2, 1, 2);
+    assertEquals("4teste", t.toString());
+    
+    try {
+      t.append((CharSequence) t2, 1, 5);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+    
+    try {
+      t.append((CharSequence) t2, 1, 0);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+    
+    t.append((CharSequence) null);
+    assertEquals("4testenull", t.toString());
+  }
+  
+  public void testAppendableInterfaceWithLongSequences() {
+    CharTermAttributeImpl t = new CharTermAttributeImpl();
+    t.append((CharSequence) "01234567890123456789012345678901234567890123456789");
+    t.append((CharSequence) CharBuffer.wrap("01234567890123456789012345678901234567890123456789".toCharArray()), 3, 50);
+    assertEquals("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.toString());
+    t.setEmpty().append((CharSequence) new StringBuilder("01234567890123456789"), 5, 17);
+    assertEquals((CharSequence) "567890123456", t.toString());
+    t.append(new StringBuffer(t));
+    assertEquals((CharSequence) "567890123456567890123456", t.toString());
+    // very wierd, to test if a subSlice is wrapped correct :)
+    CharBuffer buf = CharBuffer.wrap("012345678901234567890123456789".toCharArray(), 3, 15);
+    assertEquals("345678901234567", buf.toString());
+    t.setEmpty().append(buf, 1, 14);
+    assertEquals("4567890123456", t.toString());
+    
+    // finally use a completely custom CharSequence that is not catched by instanceof checks
+    final String longTestString = "012345678901234567890123456789";
+    t.append(new CharSequence() {
+      public char charAt(int i) { return longTestString.charAt(i); }
+      public int length() { return longTestString.length(); }
+      public CharSequence subSequence(int start, int end) { return longTestString.subSequence(start, end); }
+      public String toString() { return longTestString; }
+    });
+    assertEquals("4567890123456"+longTestString, t.toString());
+  }
+  
+  public void testNonCharSequenceAppend() {
+    CharTermAttributeImpl t = new CharTermAttributeImpl();
+    t.append("0123456789");
+    t.append("0123456789");
+    assertEquals("01234567890123456789", t.toString());
+    t.append(new StringBuilder("0123456789"));
+    assertEquals("012345678901234567890123456789", t.toString());
+    CharTermAttribute t2 = new CharTermAttributeImpl();
+    t2.append("test");
+    t.append(t2);
+    assertEquals("012345678901234567890123456789test", t.toString());
+    t.append((String) null);
+    t.append((StringBuilder) null);
+    t.append((CharTermAttribute) null);
+    assertEquals("012345678901234567890123456789testnullnullnull", t.toString());
+  }
+  
+  public void testExceptions() {
+    CharTermAttributeImpl t = new CharTermAttributeImpl();
+    t.append("test");
+    assertEquals("test", t.toString());
+
+    try {
+      t.charAt(-1);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+
+    try {
+      t.charAt(4);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+
+    try {
+      t.subSequence(0, 5);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+
+    try {
+      t.subSequence(5, 0);
+      fail("Should throw IndexOutOfBoundsException");
+    } catch(IndexOutOfBoundsException iobe) {
+    }
+  }
+
+  /*
+  
+  // test speed of the dynamic instanceof checks in append(CharSequence),
+  // to find the best max length for the generic while (start<end) loop:
+  public void testAppendPerf() {
+    CharTermAttributeImpl t = new CharTermAttributeImpl();
+    final int count = 32;
+    CharSequence[] csq = new CharSequence[count * 6];
+    final StringBuilder sb = new StringBuilder();
+    for (int i=0,j=0; i<count; i++) {
+      sb.append(i%10);
+      final String testString = sb.toString();
+      CharTermAttribute cta = new CharTermAttributeImpl();
+      cta.append(testString);
+      csq[j++] = cta;
+      csq[j++] = testString;
+      csq[j++] = new StringBuilder(sb);
+      csq[j++] = new StringBuffer(sb);
+      csq[j++] = CharBuffer.wrap(testString.toCharArray());
+      csq[j++] = new CharSequence() {
+        public char charAt(int i) { return testString.charAt(i); }
+        public int length() { return testString.length(); }
+        public CharSequence subSequence(int start, int end) { return testString.subSequence(start, end); }
+        public String toString() { return testString; }
+      };
+    }
+
+    Random rnd = newRandom();
+    long startTime = System.currentTimeMillis();
+    for (int i=0; i<100000000; i++) {
+      t.setEmpty().append(csq[rnd.nextInt(csq.length)]);
+    }
+    long endTime = System.currentTimeMillis();
+    System.out.println("Time: " + (endTime-startTime)/1000.0 + " s");
   }
   
+  */
+
 }