You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2009/09/18 17:36:24 UTC

svn commit: r816673 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/TokenStream.java test/org/apache/lucene/analysis/TestTokenStreamBWComp.java

Author: uschindler
Date: Fri Sep 18 15:36:24 2009
New Revision: 816673

URL: http://svn.apache.org/viewvc?rev=816673&view=rev
Log:
LUCENE-1919: Fix analysis back compat break. Thanks to Robert Muir for the testcases, and Yonik and Mark Miller for testing!

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java?rev=816673&r1=816672&r2=816673&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java Fri Sep 18 15:36:24 2009
@@ -29,6 +29,7 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Payload;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeSource;
@@ -377,10 +378,7 @@
       return incrementToken() ? tokenWrapper.delegate : null;
     } else {
       assert supportedMethods.hasNext;
-      final Token token = next();
-      if (token == null) return null;
-      tokenWrapper.delegate = token;
-      return token;
+      return next();
     }
   }
 
@@ -396,15 +394,24 @@
     if (tokenWrapper == null)
       throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
     
+    final Token nextToken;
     if (supportedMethods.hasIncrementToken) {
-      return incrementToken() ? ((Token) tokenWrapper.delegate.clone()) : null;
+      final Token savedDelegate = tokenWrapper.delegate;
+      tokenWrapper.delegate = new Token();
+      nextToken = incrementToken() ? tokenWrapper.delegate : null;
+      tokenWrapper.delegate = savedDelegate;
     } else {
       assert supportedMethods.hasReusableNext;
-      final Token token = next(tokenWrapper.delegate);
-      if (token == null) return null;
-      tokenWrapper.delegate = token;
-      return (Token) token.clone();
+      nextToken = next(new Token());
     }
+    
+    if (nextToken != null) {
+      Payload p = nextToken.getPayload();
+      if (p != null) {
+        nextToken.setPayload((Payload) p.clone());
+      }
+    }
+    return nextToken;
   }
 
   /**

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java?rev=816673&r1=816672&r2=816673&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java Fri Sep 18 15:36:24 2009
@@ -27,8 +27,9 @@
 /** This class tests some special cases of backwards compatibility when using the new TokenStream API with old analyzers */
 public class TestTokenStreamBWComp extends LuceneTestCase {
 
-  private final String doc = "This is the new TokenStream api";
-  private final String[] stopwords = new String[] {"is", "the", "this"};
+  private static final String doc = "This is the new TokenStream api";
+  private static final String[] stopwords = new String[] {"is", "the", "this"};
+  private static final String[] results = new String[] {"new", "tokenstream", "api"};
 
   public static class POSToken extends Token {
     public static final int PROPERNOUN = 1;
@@ -190,14 +191,17 @@
     PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class);
     TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
     
+    int i=0;
     while (stream.incrementToken()) {
       String term = termAtt.term();
       Payload p = payloadAtt.getPayload();
       if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
-        assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+        assertEquals("only TokenStream is a proper noun", "tokenstream", term);
       } else {
         assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
       }
+      assertEquals(results[i], term);
+      i++;
     }   
   }
 
@@ -205,14 +209,17 @@
     stream.reset();
     Token reusableToken = new Token();
     
+    int i=0;
     while ((reusableToken = stream.next(reusableToken)) != null) {
       String term = reusableToken.term();
       Payload p = reusableToken.getPayload();
       if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
-        assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+        assertEquals("only TokenStream is a proper noun", "tokenstream", term);
       } else {
         assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
       }
+      assertEquals(results[i], term);
+      i++;
     }   
   }
 
@@ -220,14 +227,17 @@
     stream.reset();
     
     Token token;
+    int i=0;
     while ((token = stream.next()) != null) {
       String term = token.term();
       Payload p = token.getPayload();
       if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) {
-        assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term));
+        assertEquals("only TokenStream is a proper noun", "tokenstream", term);
       } else {
         assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term));
       }
+      assertEquals(results[i], term);
+      i++;
     }   
   }
   
@@ -245,7 +255,7 @@
         while (stream.incrementToken());
         fail("If only the new API is allowed, this should fail with an UOE");
       } catch (UnsupportedOperationException uoe) {
-        assertTrue((PartOfSpeechTaggingFilter.class.getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.").equals(uoe.getMessage()));
+        assertEquals((PartOfSpeechTaggingFilter.class.getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI."),uoe.getMessage());
       }
 
       // this should pass, as all core token streams support the new API
@@ -255,17 +265,17 @@
       while (stream.incrementToken());
       
       // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper
-      assertTrue("TermAttribute is implemented by TermAttributeImpl",
+      assertTrue("TermAttribute is not implemented by TermAttributeImpl",
         stream.addAttribute(TermAttribute.class) instanceof TermAttributeImpl);
-      assertTrue("OffsetAttribute is implemented by OffsetAttributeImpl",
+      assertTrue("OffsetAttribute is not implemented by OffsetAttributeImpl",
         stream.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl);
-      assertTrue("FlagsAttribute is implemented by FlagsAttributeImpl",
+      assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",
         stream.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
-      assertTrue("PayloadAttribute is implemented by PayloadAttributeImpl",
+      assertTrue("PayloadAttribute is not implemented by PayloadAttributeImpl",
         stream.addAttribute(PayloadAttribute.class) instanceof PayloadAttributeImpl);
-      assertTrue("PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl", 
+      assertTrue("PositionIncrementAttribute is not implemented by PositionIncrementAttributeImpl", 
         stream.addAttribute(PositionIncrementAttribute.class) instanceof PositionIncrementAttributeImpl);
-      assertTrue("TypeAttribute is implemented by TypeAttributeImpl",
+      assertTrue("TypeAttribute is not implemented by TypeAttributeImpl",
         stream.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl);
         
       // try to call old API, this should fail
@@ -275,14 +285,14 @@
         while ((reusableToken = stream.next(reusableToken)) != null);
         fail("If only the new API is allowed, this should fail with an UOE");
       } catch (UnsupportedOperationException uoe) {
-        assertTrue("This TokenStream only supports the new Attributes API.".equals(uoe.getMessage()));
+        assertEquals("This TokenStream only supports the new Attributes API.", uoe.getMessage());
       }
       try {
         stream.reset();
         while (stream.next() != null);
         fail("If only the new API is allowed, this should fail with an UOE");
       } catch (UnsupportedOperationException uoe) {
-        assertTrue("This TokenStream only supports the new Attributes API.".equals(uoe.getMessage()));
+        assertEquals("This TokenStream only supports the new Attributes API.", uoe.getMessage());
       }
       
       // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper
@@ -292,17 +302,17 @@
       // itsself.
       TokenStream.setOnlyUseNewAPI(false);
       stream = new WhitespaceTokenizer(new StringReader(doc));
-      assertTrue("TermAttribute is implemented by TokenWrapper",
+      assertTrue("TermAttribute is not implemented by TokenWrapper",
         stream.addAttribute(TermAttribute.class) instanceof TokenWrapper);
-      assertTrue("OffsetAttribute is implemented by TokenWrapper",
+      assertTrue("OffsetAttribute is not implemented by TokenWrapper",
         stream.addAttribute(OffsetAttribute.class) instanceof TokenWrapper);
-      assertTrue("FlagsAttribute is implemented by TokenWrapper",
+      assertTrue("FlagsAttribute is not implemented by TokenWrapper",
         stream.addAttribute(FlagsAttribute.class) instanceof TokenWrapper);
-      assertTrue("PayloadAttribute is implemented by TokenWrapper",
+      assertTrue("PayloadAttribute is not implemented by TokenWrapper",
         stream.addAttribute(PayloadAttribute.class) instanceof TokenWrapper);
-      assertTrue("PositionIncrementAttribute is implemented by TokenWrapper",
+      assertTrue("PositionIncrementAttribute is not implemented by TokenWrapper",
         stream.addAttribute(PositionIncrementAttribute.class) instanceof TokenWrapper);
-      assertTrue("TypeAttribute is implemented by TokenWrapper",
+      assertTrue("TypeAttribute is not implemented by TokenWrapper",
         stream.addAttribute(TypeAttribute.class) instanceof TokenWrapper);
       
     } finally {
@@ -321,8 +331,63 @@
       while (stream.incrementToken());
       fail("One TokenFilter does not override any of the required methods, so it should fail.");
     } catch (UnsupportedOperationException uoe) {
-      assertTrue(uoe.getMessage().endsWith("does not implement any of incrementToken(), next(Token), next()."));
+      assertTrue("invalid UOE message", uoe.getMessage().endsWith("does not implement any of incrementToken(), next(Token), next()."));
     }
   }
   
+  public void testMixedOldApiConsumer() throws Exception {
+    // WhitespaceTokenizer is using incrementToken() API:
+    TokenStream stream = new WhitespaceTokenizer(new StringReader("foo bar moo maeh"));
+    
+    Token foo = new Token();
+    foo = stream.next(foo);
+    Token bar = stream.next();
+    assertEquals("foo", foo.term());
+    assertEquals("bar", bar.term());
+    
+    Token moo = stream.next(foo);
+    assertEquals("moo", moo.term());
+    assertEquals("private 'bar' term should still be valid", "bar", bar.term());
+    
+    // and now we also use incrementToken()... (very bad, but should work)
+    TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
+    assertTrue(stream.incrementToken());
+    assertEquals("maeh", termAtt.term());    
+    assertEquals("private 'bar' term should still be valid", "bar", bar.term());    
+  }
+  
+  /*
+   * old api that cycles thru foo, bar, meh
+   */
+  private class RoundRobinOldAPI extends TokenStream {
+    int count = 0;
+    String terms[] = { "foo", "bar", "meh" };
+
+    public Token next(Token reusableToken) throws IOException {
+      reusableToken.setTermBuffer(terms[count % terms.length]);
+      count++;
+      return reusableToken;
+    }
+  }
+  
+  public void testMixedOldApiConsumer2() throws Exception {
+    // RoundRobinOldAPI is using TokenStream(next)
+    TokenStream stream = new RoundRobinOldAPI();
+    TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
+    
+    assertTrue(stream.incrementToken());
+    Token bar = stream.next();
+    assertEquals("foo", termAtt.term());
+    assertEquals("bar", bar.term());
+
+    assertTrue(stream.incrementToken());
+    assertEquals("meh", termAtt.term());
+    assertEquals("private 'bar' term should still be valid", "bar", bar.term());
+
+    Token foo = stream.next();
+    assertEquals("the term attribute should still be the same", "meh", termAtt.term());
+    assertEquals("foo", foo.term());
+    assertEquals("private 'bar' term should still be valid", "bar", bar.term());
+  }
+  
 }