You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/04/19 12:45:25 UTC
svn commit: r935521 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/analysis/tokenattributes/
test/org/apache/lucene/analysis/tokenattributes/
Author: uschindler
Date: Mon Apr 19 10:45:25 2010
New Revision: 935521
URL: http://svn.apache.org/viewvc?rev=935521&view=rev
Log:
LUCENE-2401: Improve CharTermAttribute performance
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java Mon Apr 19 10:45:25 2010
@@ -68,4 +68,24 @@ public interface CharTermAttribute exten
public CharTermAttribute append(CharSequence csq, int start, int end);
public CharTermAttribute append(char c);
+ /** Appends the specified {@code String} to this character sequence.
+ * <p>The characters of the {@code String} argument are appended, in order, increasing the length of
+ * this sequence by the length of the argument. If argument is {@code null}, then the four
+ * characters {@code "null"} are appended.
+ */
+ public CharTermAttribute append(String s);
+
+ /** Appends the specified {@code StringBuilder} to this character sequence.
+ * <p>The characters of the {@code StringBuilder} argument are appended, in order, increasing the length of
+ * this sequence by the length of the argument. If argument is {@code null}, then the four
+ * characters {@code "null"} are appended.
+ */
+ public CharTermAttribute append(StringBuilder sb);
+
+ /** Appends the contents of the other {@code CharTermAttribute} to this character sequence.
+ * <p>The characters of the {@code CharTermAttribute} argument are appended, in order, increasing the length of
+ * this sequence by the length of the argument. If argument is {@code null}, then the four
+ * characters {@code "null"} are appended.
+ */
+ public CharTermAttribute append(CharTermAttribute termAtt);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Mon Apr 19 10:45:25 2010
@@ -41,7 +41,7 @@ public class CharTermAttributeImpl exten
return new String(termBuffer, 0, termLength);
}
- public void copyBuffer(char[] buffer, int offset, int length) {
+ public final void copyBuffer(char[] buffer, int offset, int length) {
growTermBuffer(length);
System.arraycopy(buffer, offset, termBuffer, 0, length);
termLength = length;
@@ -69,7 +69,7 @@ public class CharTermAttributeImpl exten
termLength = length;
}
- public char[] buffer() {
+ public final char[] buffer() {
return termBuffer;
}
@@ -78,7 +78,7 @@ public class CharTermAttributeImpl exten
return termBuffer;
}
- public char[] resizeBuffer(int newSize) {
+ public final char[] resizeBuffer(int newSize) {
if(termBuffer.length < newSize){
// Not big enough; create a new array with slight
// over allocation and preserve content
@@ -107,14 +107,14 @@ public class CharTermAttributeImpl exten
return termLength;
}
- public CharTermAttribute setLength(int length) {
+ public final CharTermAttribute setLength(int length) {
if (length > termBuffer.length)
throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
termLength = length;
return this;
}
- public CharTermAttribute setEmpty() {
+ public final CharTermAttribute setEmpty() {
termLength = 0;
return this;
}
@@ -125,7 +125,7 @@ public class CharTermAttributeImpl exten
}
// *** TermToBytesRefAttribute interface ***
- public int toBytesRef(BytesRef target) {
+ public final int toBytesRef(BytesRef target) {
// TODO: Maybe require that bytes is already initialized? TermsHashPerField ensures this.
if (target.bytes == null) {
target.bytes = new byte[termLength * 4];
@@ -134,53 +134,109 @@ public class CharTermAttributeImpl exten
}
// *** CharSequence interface ***
- public int length() {
+ public final int length() {
return termLength;
}
- public char charAt(int index) {
+ public final char charAt(int index) {
if (index >= termLength)
throw new IndexOutOfBoundsException();
return termBuffer[index];
}
- public CharSequence subSequence(final int start, final int end) {
+ public final CharSequence subSequence(final int start, final int end) {
if (start > termLength || end > termLength)
throw new IndexOutOfBoundsException();
return new String(termBuffer, start, end - start);
}
// *** Appendable interface ***
- public CharTermAttribute append(CharSequence csq) {
+
+ public final CharTermAttribute append(CharSequence csq) {
+ if (csq == null) // needed for Appendable compliance
+ return appendNull();
return append(csq, 0, csq.length());
}
- public CharTermAttribute append(CharSequence csq, int start, int end) {
- resizeBuffer(termLength + end - start);
- if (csq instanceof String) {
- ((String) csq).getChars(start, end, termBuffer, termLength);
- } else if (csq instanceof StringBuilder) {
- ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
- } else if (csq instanceof StringBuffer) {
- ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
- } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
- final CharBuffer cb = (CharBuffer) csq;
- System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, end - start);
+ public final CharTermAttribute append(CharSequence csq, int start, int end) {
+ if (csq == null) // needed for Appendable compliance
+ csq = "null";
+ final int len = end - start, csqlen = csq.length();
+ if (len < 0 || start > csqlen || end > csqlen)
+ throw new IndexOutOfBoundsException();
+ if (len == 0)
+ return this;
+ resizeBuffer(termLength + len);
+ if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
+ if (csq instanceof String) {
+ ((String) csq).getChars(start, end, termBuffer, termLength);
+ } else if (csq instanceof StringBuilder) {
+ ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
+ } else if (csq instanceof CharTermAttribute) {
+ System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
+ } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
+ final CharBuffer cb = (CharBuffer) csq;
+ System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
+ } else if (csq instanceof StringBuffer) {
+ ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
+ } else {
+ while (start < end)
+ termBuffer[termLength++] = csq.charAt(start++);
+ // no fall-through here, as termLength is updated!
+ return this;
+ }
+ termLength += len;
+ return this;
} else {
while (start < end)
termBuffer[termLength++] = csq.charAt(start++);
- // no fall-through here, as termLength is updated!
return this;
}
- termLength += end - start;
- return this;
}
- public CharTermAttribute append(char c) {
+ public final CharTermAttribute append(char c) {
resizeBuffer(termLength + 1)[termLength++] = c;
return this;
}
+ // *** For performance some convenience methods in addition to CSQ's ***
+
+ public final CharTermAttribute append(String s) {
+ if (s == null) // needed for Appendable compliance
+ return appendNull();
+ final int len = s.length();
+ s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+ termLength += len;
+ return this;
+ }
+
+ public final CharTermAttribute append(StringBuilder s) {
+ if (s == null) // needed for Appendable compliance
+ return appendNull();
+ final int len = s.length();
+ s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+ termLength += len;
+ return this;
+ }
+
+ public final CharTermAttribute append(CharTermAttribute ta) {
+ if (ta == null) // needed for Appendable compliance
+ return appendNull();
+ final int len = ta.length();
+ System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
+ termLength += len;
+ return this;
+ }
+
+ private CharTermAttribute appendNull() {
+ resizeBuffer(termLength + 4);
+ termBuffer[termLength++] = 'n';
+ termBuffer[termLength++] = 'u';
+ termBuffer[termLength++] = 'l';
+ termBuffer[termLength++] = 'l';
+ return this;
+ }
+
// *** AttributeImpl ***
@Override
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java?rev=935521&r1=935520&r2=935521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java Mon Apr 19 10:45:25 2010
@@ -22,6 +22,7 @@ import java.nio.CharBuffer;
import java.util.Formatter;
import java.util.Locale;
import java.util.regex.Pattern;
+import java.util.Random;
public class TestCharTermAttributeImpl extends LuceneTestCase {
@@ -157,21 +158,155 @@ public class TestCharTermAttributeImpl e
assertEquals("12345678", t.toString());
t.append('9');
assertEquals("123456789", t.toString());
- t.append("0");
+ t.append((CharSequence) "0");
assertEquals("1234567890", t.toString());
- t.append("0123456789", 1, 3);
+ t.append((CharSequence) "0123456789", 1, 3);
assertEquals("123456789012", t.toString());
- t.append(CharBuffer.wrap("0123456789".toCharArray()), 3, 5);
+ t.append((CharSequence) CharBuffer.wrap("0123456789".toCharArray()), 3, 5);
assertEquals("12345678901234", t.toString());
- t.append(t);
+ t.append((CharSequence) t);
assertEquals("1234567890123412345678901234", t.toString());
- t.append(new StringBuilder("0123456789"), 5, 7);
+ t.append((CharSequence) new StringBuilder("0123456789"), 5, 7);
assertEquals("123456789012341234567890123456", t.toString());
- t.append(new StringBuffer(t));
+ t.append((CharSequence) new StringBuffer(t));
assertEquals("123456789012341234567890123456123456789012341234567890123456", t.toString());
// very wierd, to test if a subSlice is wrapped correct :)
- t.setEmpty().append(CharBuffer.wrap("0123456789".toCharArray(), 3, 5) /* "34" */, 1, 2);
+ CharBuffer buf = CharBuffer.wrap("0123456789".toCharArray(), 3, 5);
+ assertEquals("34567", buf.toString());
+ t.setEmpty().append((CharSequence) buf, 1, 2);
assertEquals("4", t.toString());
+ CharTermAttribute t2 = new CharTermAttributeImpl();
+ t2.append("test");
+ t.append((CharSequence) t2);
+ assertEquals("4test", t.toString());
+ t.append((CharSequence) t2, 1, 2);
+ assertEquals("4teste", t.toString());
+
+ try {
+ t.append((CharSequence) t2, 1, 5);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+
+ try {
+ t.append((CharSequence) t2, 1, 0);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+
+ t.append((CharSequence) null);
+ assertEquals("4testenull", t.toString());
+ }
+
+ public void testAppendableInterfaceWithLongSequences() {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ t.append((CharSequence) "01234567890123456789012345678901234567890123456789");
+ t.append((CharSequence) CharBuffer.wrap("01234567890123456789012345678901234567890123456789".toCharArray()), 3, 50);
+ assertEquals("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.toString());
+ t.setEmpty().append((CharSequence) new StringBuilder("01234567890123456789"), 5, 17);
+ assertEquals((CharSequence) "567890123456", t.toString());
+ t.append(new StringBuffer(t));
+ assertEquals((CharSequence) "567890123456567890123456", t.toString());
+ // very wierd, to test if a subSlice is wrapped correct :)
+ CharBuffer buf = CharBuffer.wrap("012345678901234567890123456789".toCharArray(), 3, 15);
+ assertEquals("345678901234567", buf.toString());
+ t.setEmpty().append(buf, 1, 14);
+ assertEquals("4567890123456", t.toString());
+
+ // finally use a completely custom CharSequence that is not catched by instanceof checks
+ final String longTestString = "012345678901234567890123456789";
+ t.append(new CharSequence() {
+ public char charAt(int i) { return longTestString.charAt(i); }
+ public int length() { return longTestString.length(); }
+ public CharSequence subSequence(int start, int end) { return longTestString.subSequence(start, end); }
+ public String toString() { return longTestString; }
+ });
+ assertEquals("4567890123456"+longTestString, t.toString());
+ }
+
+ public void testNonCharSequenceAppend() {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ t.append("0123456789");
+ t.append("0123456789");
+ assertEquals("01234567890123456789", t.toString());
+ t.append(new StringBuilder("0123456789"));
+ assertEquals("012345678901234567890123456789", t.toString());
+ CharTermAttribute t2 = new CharTermAttributeImpl();
+ t2.append("test");
+ t.append(t2);
+ assertEquals("012345678901234567890123456789test", t.toString());
+ t.append((String) null);
+ t.append((StringBuilder) null);
+ t.append((CharTermAttribute) null);
+ assertEquals("012345678901234567890123456789testnullnullnull", t.toString());
+ }
+
+ public void testExceptions() {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ t.append("test");
+ assertEquals("test", t.toString());
+
+ try {
+ t.charAt(-1);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+
+ try {
+ t.charAt(4);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+
+ try {
+ t.subSequence(0, 5);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+
+ try {
+ t.subSequence(5, 0);
+ fail("Should throw IndexOutOfBoundsException");
+ } catch(IndexOutOfBoundsException iobe) {
+ }
+ }
+
+ /*
+
+ // test speed of the dynamic instanceof checks in append(CharSequence),
+ // to find the best max length for the generic while (start<end) loop:
+ public void testAppendPerf() {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ final int count = 32;
+ CharSequence[] csq = new CharSequence[count * 6];
+ final StringBuilder sb = new StringBuilder();
+ for (int i=0,j=0; i<count; i++) {
+ sb.append(i%10);
+ final String testString = sb.toString();
+ CharTermAttribute cta = new CharTermAttributeImpl();
+ cta.append(testString);
+ csq[j++] = cta;
+ csq[j++] = testString;
+ csq[j++] = new StringBuilder(sb);
+ csq[j++] = new StringBuffer(sb);
+ csq[j++] = CharBuffer.wrap(testString.toCharArray());
+ csq[j++] = new CharSequence() {
+ public char charAt(int i) { return testString.charAt(i); }
+ public int length() { return testString.length(); }
+ public CharSequence subSequence(int start, int end) { return testString.subSequence(start, end); }
+ public String toString() { return testString; }
+ };
+ }
+
+ Random rnd = newRandom();
+ long startTime = System.currentTimeMillis();
+ for (int i=0; i<100000000; i++) {
+ t.setEmpty().append(csq[rnd.nextInt(csq.length)]);
+ }
+ long endTime = System.currentTimeMillis();
+ System.out.println("Time: " + (endTime-startTime)/1000.0 + " s");
}
+ */
+
}