You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/04/09 13:50:16 UTC
svn commit: r932369 - in /lucene/dev/trunk/lucene: ./
backwards/src/test/org/apache/lucene/analysis/
src/java/org/apache/lucene/analysis/
src/java/org/apache/lucene/analysis/tokenattributes/
src/test/org/apache/lucene/analysis/
Author: uschindler
Date: Fri Apr 9 11:50:16 2010
New Revision: 932369
URL: http://svn.apache.org/viewvc?rev=932369&view=rev
Log:
LUCENE-2302: Fix toString() issues with Token, add javadocs explaining break, and add missing CHANGES.txt
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Apr 9 11:50:16 2010
@@ -94,6 +94,13 @@ Changes in backwards compatibility polic
FSDirectory.FSIndexInput. Anyone extending this class will have to
fix their code on upgrading. (Earwin Burrfoot via Mike McCandless)
+* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
+ now implements CharSequence. This requires the toString() methods of
+ CharTermAttribute, deprecated TermAttribute, and Token to return only
+ the term text and no other attribute contents.
+ TODO: Point to new attribute inspection API coming with LUCENE-2374.
+ (Uwe Schindler, Robert Muir)
+
Changes in runtime behavior
* LUCENE-1923: Made IndexReader.toString() produce something
@@ -186,6 +193,17 @@ API Changes
deleted docs (getDeletedDocs), providing a new Bits interface to
directly query by doc ID.
+* LUCENE-2302: Deprecated TermAttribute and replaced by a new
+ CharTermAttribute. The change is backwards compatible, so
+ mixed new/old TokenStreams all work on the same char[] buffer
+ independent of which interface they use. CharTermAttribute
+ has shorter method names and implements CharSequence and
+ Appendable. This allows usage like Java's StringBuilder in
+ addition to direct char[] access. Also terms can directly be
+ used in places where CharSequence is allowed (e.g. regular
+ expressions).
+ (Uwe Schindler, Robert Muir)
+
Bug fixes
* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
Modified: lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java Fri Apr 9 11:50:16 2010
@@ -31,6 +31,7 @@ public class TestToken extends LuceneTes
super(name);
}
+ /* toString changed in 3.1:
public void testCtor() throws Exception {
Token t = new Token();
char[] content = "hello".toCharArray();
@@ -60,6 +61,7 @@ public class TestToken extends LuceneTes
assertEquals("(hello,6,22,type=junk)", t.toString());
assertEquals(0, t.getFlags());
}
+ */
public void testResize() {
Token t = new Token();
@@ -139,6 +141,7 @@ public class TestToken extends LuceneTes
assertEquals(20000, t.termLength());
}
+ /* toString changed in 3.1:
public void testToString() throws Exception {
char[] b = {'a', 'l', 'o', 'h', 'a'};
Token t = new Token("", 0, 5);
@@ -148,6 +151,7 @@ public class TestToken extends LuceneTes
t.setTermBuffer("hi there");
assertEquals("(hi there,0,5)", t.toString());
}
+ */
public void testTermBufferEquals() throws Exception {
Token t1a = new Token();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java Fri Apr 9 11:50:16 2010
@@ -112,10 +112,14 @@ import org.apache.lucene.util.AttributeI
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
</ul>
</p>
-
+ <p>
+ <b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
+ {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
+ This method now only prints the term text, no additional information anymore.
+ </p>
@see org.apache.lucene.index.Payload
*/
-// TODO: change superclass to CharTermAttribute in 4.0!
+// TODO: change superclass to CharTermAttribute in 4.0! Maybe deprecate the whole class?
public class Token extends TermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
FlagsAttribute, OffsetAttribute, PayloadAttribute {
@@ -349,19 +353,6 @@ public class Token extends TermAttribute
this.payload = payload;
}
- @Override
- public String toString() {
- final StringBuilder sb = new StringBuilder();
- sb.append('(').append(super.toString()).append(',')
- .append(startOffset).append(',').append(endOffset);
- if (!"word".equals(type))
- sb.append(",type=").append(type);
- if (positionIncrement != 1)
- sb.append(",posIncr=").append(positionIncrement);
- sb.append(')');
- return sb.toString();
- }
-
/** Resets the term text, payload, flags, and positionIncrement,
* startOffset, endOffset and token type to default.
*/
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Fri Apr 9 11:50:16 2010
@@ -224,6 +224,15 @@ public class CharTermAttributeImpl exten
return false;
}
+ /**
+ * Returns solely the term text as specified by the
+ * {@link CharSequence} interface.
+ * <p>This method changed the behavior with Lucene 3.1,
+ * before it returned a String representation of the whole
+ * term with all attributes.
+ * This affects especially the
+ * {@link org.apache.lucene.analysis.Token} subclass.
+ */
@Override
public String toString() {
return new String(termBuffer, 0, termLength);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java Fri Apr 9 11:50:16 2010
@@ -36,6 +36,8 @@ public class TestToken extends LuceneTes
char[] content = "hello".toCharArray();
t.setTermBuffer(content, 0, content.length);
assertNotSame(t.termBuffer(), content);
+ assertEquals(0, t.startOffset());
+ assertEquals(0, t.endOffset());
assertEquals("hello", t.term());
assertEquals("word", t.type());
assertEquals(0, t.getFlags());
@@ -43,20 +45,28 @@ public class TestToken extends LuceneTes
t = new Token(6, 22);
t.setTermBuffer(content, 0, content.length);
assertEquals("hello", t.term());
- assertEquals("(hello,6,22)", t.toString());
+ assertEquals("hello", t.toString());
+ assertEquals(6, t.startOffset());
+ assertEquals(22, t.endOffset());
assertEquals("word", t.type());
assertEquals(0, t.getFlags());
t = new Token(6, 22, 7);
t.setTermBuffer(content, 0, content.length);
assertEquals("hello", t.term());
- assertEquals("(hello,6,22)", t.toString());
+ assertEquals("hello", t.toString());
+ assertEquals(6, t.startOffset());
+ assertEquals(22, t.endOffset());
+ assertEquals("word", t.type());
assertEquals(7, t.getFlags());
t = new Token(6, 22, "junk");
t.setTermBuffer(content, 0, content.length);
assertEquals("hello", t.term());
- assertEquals("(hello,6,22,type=junk)", t.toString());
+ assertEquals("hello", t.toString());
+ assertEquals(6, t.startOffset());
+ assertEquals(22, t.endOffset());
+ assertEquals("junk", t.type());
assertEquals(0, t.getFlags());
}
@@ -142,10 +152,10 @@ public class TestToken extends LuceneTes
char[] b = {'a', 'l', 'o', 'h', 'a'};
Token t = new Token("", 0, 5);
t.setTermBuffer(b, 0, 5);
- assertEquals("(aloha,0,5)", t.toString());
+ assertEquals("aloha", t.toString());
t.setTermBuffer("hi there");
- assertEquals("(hi there,0,5)", t.toString());
+ assertEquals("hi there", t.toString());
}
public void testTermBufferEquals() throws Exception {