You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/04/09 13:50:16 UTC

svn commit: r932369 - in /lucene/dev/trunk/lucene: ./ backwards/src/test/org/apache/lucene/analysis/ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/analysis/tokenattributes/ src/test/org/apache/lucene/analysis/

Author: uschindler
Date: Fri Apr  9 11:50:16 2010
New Revision: 932369

URL: http://svn.apache.org/viewvc?rev=932369&view=rev
Log:
LUCENE-2302: Fix toString() issues with Token, add javadocs explaining break, and add missing CHANGES.txt

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Apr  9 11:50:16 2010
@@ -94,6 +94,13 @@ Changes in backwards compatibility polic
   FSDirectory.FSIndexInput. Anyone extending this class will have to
   fix their code on upgrading. (Earwin Burrfoot via Mike McCandless)
 
+* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
+  now implements CharSequence. This requires the toString() methods of
+  CharTermAttribute, deprecated TermAttribute, and Token to return only
+  the term text and no other attribute contents.
+  TODO: Point to new attribute inspection API coming with LUCENE-2374.
+  (Uwe Schindler, Robert Muir)
+
 Changes in runtime behavior
 
 * LUCENE-1923: Made IndexReader.toString() produce something
@@ -186,6 +193,17 @@ API Changes
   deleted docs (getDeletedDocs), providing a new Bits interface to
   directly query by doc ID.
 
+* LUCENE-2302: Deprecated TermAttribute and replaced by a new
+  CharTermAttribute. The change is backwards compatible, so
+  mixed new/old TokenStreams all work on the same char[] buffer
+  independent of which interface they use. CharTermAttribute
+  has shorter method names and implements CharSequence and
+  Appendable. This allows usage like Java's StringBuilder in
+  addition to direct char[] access. Also terms can directly be
+  used in places where CharSequence is allowed (e.g. regular
+  expressions).
+  (Uwe Schindler, Robert Muir)
+
 Bug fixes
 
 * LUCENE-2119: Don't throw NegativeArraySizeException if you pass

Modified: lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java Fri Apr  9 11:50:16 2010
@@ -31,6 +31,7 @@ public class TestToken extends LuceneTes
     super(name);
   }
 
+  /* toString changed in 3.1:
   public void testCtor() throws Exception {
     Token t = new Token();
     char[] content = "hello".toCharArray();
@@ -60,6 +61,7 @@ public class TestToken extends LuceneTes
     assertEquals("(hello,6,22,type=junk)", t.toString());
     assertEquals(0, t.getFlags());
   }
+  */
 
   public void testResize() {
     Token t = new Token();
@@ -139,6 +141,7 @@ public class TestToken extends LuceneTes
     assertEquals(20000, t.termLength());
   }
 
+  /* toString changed in 3.1:
   public void testToString() throws Exception {
     char[] b = {'a', 'l', 'o', 'h', 'a'};
     Token t = new Token("", 0, 5);
@@ -148,6 +151,7 @@ public class TestToken extends LuceneTes
     t.setTermBuffer("hi there");
     assertEquals("(hi there,0,5)", t.toString());
   }
+  */
 
   public void testTermBufferEquals() throws Exception {
     Token t1a = new Token();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java Fri Apr  9 11:50:16 2010
@@ -112,10 +112,14 @@ import org.apache.lucene.util.AttributeI
   <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
   </ul>
   </p>
-
+  <p>
+  <b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
+  {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
+  This method now only prints the term text, no additional information anymore.
+  </p>
   @see org.apache.lucene.index.Payload
 */
-// TODO: change superclass to CharTermAttribute in 4.0!
+// TODO: change superclass to CharTermAttribute in 4.0! Maybe deprecate the whole class?
 public class Token extends TermAttributeImpl 
                    implements TypeAttribute, PositionIncrementAttribute,
                               FlagsAttribute, OffsetAttribute, PayloadAttribute {
@@ -349,19 +353,6 @@ public class Token extends TermAttribute
     this.payload = payload;
   }
   
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder();
-    sb.append('(').append(super.toString()).append(',')
-      .append(startOffset).append(',').append(endOffset);
-    if (!"word".equals(type))
-      sb.append(",type=").append(type);
-    if (positionIncrement != 1)
-      sb.append(",posIncr=").append(positionIncrement);
-    sb.append(')');
-    return sb.toString();
-  }
-
   /** Resets the term text, payload, flags, and positionIncrement,
    * startOffset, endOffset and token type to default.
    */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Fri Apr  9 11:50:16 2010
@@ -224,6 +224,15 @@ public class CharTermAttributeImpl exten
     return false;
   }
 
+  /** 
+   * Returns solely the term text as specified by the
+   * {@link CharSequence} interface.
+   * <p>This method changed the behavior with Lucene 3.1,
+   * before it returned a String representation of the whole
+   * term with all attributes.
+   * This affects especially the
+   * {@link org.apache.lucene.analysis.Token} subclass.
+   */
   @Override
   public String toString() {
     return new String(termBuffer, 0, termLength);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java?rev=932369&r1=932368&r2=932369&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestToken.java Fri Apr  9 11:50:16 2010
@@ -36,6 +36,8 @@ public class TestToken extends LuceneTes
     char[] content = "hello".toCharArray();
     t.setTermBuffer(content, 0, content.length);
     assertNotSame(t.termBuffer(), content);
+    assertEquals(0, t.startOffset());
+    assertEquals(0, t.endOffset());
     assertEquals("hello", t.term());
     assertEquals("word", t.type());
     assertEquals(0, t.getFlags());
@@ -43,20 +45,28 @@ public class TestToken extends LuceneTes
     t = new Token(6, 22);
     t.setTermBuffer(content, 0, content.length);
     assertEquals("hello", t.term());
-    assertEquals("(hello,6,22)", t.toString());
+    assertEquals("hello", t.toString());
+    assertEquals(6, t.startOffset());
+    assertEquals(22, t.endOffset());
     assertEquals("word", t.type());
     assertEquals(0, t.getFlags());
 
     t = new Token(6, 22, 7);
     t.setTermBuffer(content, 0, content.length);
     assertEquals("hello", t.term());
-    assertEquals("(hello,6,22)", t.toString());
+    assertEquals("hello", t.toString());
+    assertEquals(6, t.startOffset());
+    assertEquals(22, t.endOffset());
+    assertEquals("word", t.type());
     assertEquals(7, t.getFlags());
 
     t = new Token(6, 22, "junk");
     t.setTermBuffer(content, 0, content.length);
     assertEquals("hello", t.term());
-    assertEquals("(hello,6,22,type=junk)", t.toString());
+    assertEquals("hello", t.toString());
+    assertEquals(6, t.startOffset());
+    assertEquals(22, t.endOffset());
+    assertEquals("junk", t.type());
     assertEquals(0, t.getFlags());
   }
 
@@ -142,10 +152,10 @@ public class TestToken extends LuceneTes
     char[] b = {'a', 'l', 'o', 'h', 'a'};
     Token t = new Token("", 0, 5);
     t.setTermBuffer(b, 0, 5);
-    assertEquals("(aloha,0,5)", t.toString());
+    assertEquals("aloha", t.toString());
 
     t.setTermBuffer("hi there");
-    assertEquals("(hi there,0,5)", t.toString());
+    assertEquals("hi there", t.toString());
   }
 
   public void testTermBufferEquals() throws Exception {