You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ma...@apache.org on 2009/12/21 15:55:09 UTC
svn commit: r892841 [2/2] - in /lucene/solr/branches/cloud: ./ contrib/dataimporthandler/ contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/ contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ src...

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestKeepWordFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestKeepWordFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestKeepWordFilter.java Mon Dec 21 14:55:08 2009
@@ -17,13 +17,14 @@
 
 package org.apache.solr.analysis;
 
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
 
 
 /**
@@ -37,7 +38,7 @@
     words.add( "aaa" );
     words.add( "bbb" );
     
-    List<Token> input = tokens( "aaa BBB ccc ddd EEE" );
+    String input = "aaa BBB ccc ddd EEE";
     Map<String,String> args = new HashMap<String, String>();
 
     
@@ -47,18 +48,28 @@
     factory.init( args );
     factory.inform( solrConfig.getResourceLoader() );
     factory.setWords( words );
+    assertTrue(factory.isIgnoreCase());
+    TokenStream stream = factory.create(new WhitespaceTokenizer(new StringReader(input)));
+    assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
     
-    List<Token> expect = tokens( "aaa BBB" );
-    List<Token> real = getTokens(factory.create( new IterTokenStream(input) ));
-    assertTokEqual( expect, real );
+    // Test Stopwords (ignoreCase via the setter instead)
+    factory = new KeepWordFilterFactory();
+    args = new HashMap<String, String>();
+    factory.init( args );
+    factory.inform( solrConfig.getResourceLoader() );
+    factory.setIgnoreCase(true);
+    factory.setWords( words );
+    assertTrue(factory.isIgnoreCase());
+    stream = factory.create(new WhitespaceTokenizer(new StringReader(input)));
+    assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
     
     // Now force case
+    args = new HashMap<String, String>();
     args.put( "ignoreCase", "false" );
     factory.init( args );
     factory.inform( solrConfig.getResourceLoader() );
-    
-    expect = tokens( "aaa" );
-    real = getTokens(factory.create( new IterTokenStream(input) ));
-    assertTokEqual( expect, real );
+    assertFalse(factory.isIgnoreCase());
+    stream = factory.create(new WhitespaceTokenizer(new StringReader(input)));
+    assertTokenStreamContents(stream, new String[] { "aaa" });
   }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java Mon Dec 21 14:55:08 2009
@@ -1,37 +1,27 @@
 package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 /**
- * @version $Id$
  * @since solr 1.4
  */
-public class TestMultiWordSynonyms {
+public class TestMultiWordSynonyms extends BaseTokenTestCase {
 
   @Test
-  public void testMultiWordSynonmys() throws IOException {
+  public void testMultiWordSynonyms() throws IOException {
     List<String> rules = new ArrayList<String>();
     rules.add("a b c,d");
     SynonymMap synMap = new SynonymMap(true);
     SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);
 
     SynonymFilter ts = new SynonymFilter(new WhitespaceTokenizer(new StringReader("a e")), synMap);
-    TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
-
-    ts.reset();
-    List<String> tokens = new ArrayList<String>();
-    while (ts.incrementToken()) tokens.add(termAtt.term());
-
     // This fails because ["e","e"] is the value of the token stream
-    Assert.assertEquals(Arrays.asList("a", "e"), tokens);
+    assertTokenStreamContents(ts, new String[] { "a", "e" });
   }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceCharFilter.java Mon Dec 21 14:55:08 2009
@@ -19,6 +19,8 @@
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;
@@ -37,20 +39,33 @@
   // this is test.
   public void testNothingChange() throws IOException {
     final String BLOCK = "this is test.";
-    CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1$2$3",
+    PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+    args.put("replacement", "$1$2$3");
+    factory.init(args);
+    CharStream cs = factory.create(
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "this,1,0,4 is,1,5,7 test.,1,8,13" ), getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "this", "is", "test." },
+        new int[] { 0, 5, 8 },
+        new int[] { 4, 7, 13 },
+        new int[] { 1, 1, 1 });
   }
   
   // 012345678
   // aa bb cc
   public void testReplaceByEmpty() throws IOException {
     final String BLOCK = "aa bb cc";
-    CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "",
+    PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+    factory.init(args);
+    CharStream cs = factory.create(
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertEquals( 0, getTokens( ts ).size() );
+    assertFalse(ts.incrementToken());
   }
   
   // 012345678
@@ -58,10 +73,19 @@
   // aa#bb#cc
   public void test1block1matchSameLength() throws IOException {
     final String BLOCK = "aa bb cc";
-    CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1#$2#$3",
+    PatternReplaceCharFilterFactory factory = new PatternReplaceCharFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("pattern", "(aa)\\s+(bb)\\s+(cc)");
+    args.put("replacement", "$1#$2#$3");
+    factory.init(args);
+    CharStream cs = factory.create(
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa#bb#cc,1,0,8" ), getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa#bb#cc" },
+        new int[] { 0 },
+        new int[] { 8 },
+        new int[] { 1 });
   }
 
   //           11111
@@ -73,7 +97,11 @@
     CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1##$2###$3",
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa##bb###cc,1,0,8 dd,1,9,11" ), getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa##bb###cc", "dd" },
+        new int[] { 0, 9 },
+        new int[] { 8, 11 },
+        new int[] { 1, 1 });
   }
 
   // 01234567
@@ -84,7 +112,11 @@
     CharStream cs = new PatternReplaceCharFilter( "a", "aa",
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa,1,1,2 aa,1,4,5" ), getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa", "aa" },
+        new int[] { 1, 4 },
+        new int[] { 2, 5 },
+        new int[] { 1, 1 });
   }
 
   //           11111
@@ -96,7 +128,11 @@
     CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1#$2",
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa#bb,1,0,11 dd,1,12,14" ), getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa#bb", "dd" },
+        new int[] { 0, 12 },
+        new int[] { 11, 14 },
+        new int[] { 1, 1 });
   }
 
   //           111111111122222222223333
@@ -108,8 +144,11 @@
     CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)\\s+(cc)", "$1  $2  $3",
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa,1,2,4 bb,1,6,8 cc,1,9,10 ---,1,11,14 aa,1,15,17 bb,1,18,20 aa,1,21,23 bb,1,25,27 cc,1,29,33" ),
-        getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
+        new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 },
+        new int[] { 4, 8, 10, 14, 17, 20, 23, 27, 33 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 });
   }
 
   //           11111111112222222222333333333
@@ -121,8 +160,11 @@
     CharStream cs = new PatternReplaceCharFilter( "(aa)\\s+(bb)", "$1##$2", ".",
           CharReader.get( new StringReader( BLOCK ) ) );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa##bb,1,2,7 cc,1,8,10 ---,1,11,14 aa##bb,1,15,20 aa.,1,21,24 bb,1,25,27 aa##bb,1,28,35 cc,1,36,38" ),
-        getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
+        new int[] { 2, 8, 11, 15, 21, 25, 28, 36 },
+        new int[] { 7, 10, 14, 20, 24, 27, 35, 38 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1 });
   }
 
   //           11111111112222222222333333333
@@ -136,7 +178,10 @@
     cs = new PatternReplaceCharFilter( "bb", "b", ".", cs );
     cs = new PatternReplaceCharFilter( "ccc", "c", ".", cs );
     TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokEqualOff( tokens( "aa,1,1,2 b,1,3,5 -,1,6,7 c,1,8,11 .,1,12,13 ---,1,14,17 b,1,18,20 aa,1,21,22 .,1,23,24 c,1,25,28 c,1,29,32 b,1,33,35" ),
-        getTokens( ts ) );
+    assertTokenStreamContents(ts,
+        new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
+        new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 },
+        new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
   }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternReplaceFilter.java Mon Dec 21 14:55:08 2009
@@ -17,7 +17,6 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 
@@ -27,7 +26,7 @@
 /**
  * @version $Id:$
  */
-public class TestPatternReplaceFilter extends AnalysisTestCase {
+public class TestPatternReplaceFilter extends BaseTokenTestCase {
 
   public void testReplaceAll() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
@@ -35,14 +34,8 @@
             (new WhitespaceTokenizer(new StringReader(input)),
                     Pattern.compile("a*b"),
                     "-", true);
-    Token token = ts.next();
-    assertEquals("-foo-foo-foo-", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("-", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("c-", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
+    assertTokenStreamContents(ts, 
+        new String[] { "-foo-foo-foo-", "-", "c-" });
   }
 
   public void testReplaceFirst() throws Exception {
@@ -51,14 +44,8 @@
             (new WhitespaceTokenizer(new StringReader(input)),
                     Pattern.compile("a*b"),
                     "-", false);
-    Token token = ts.next();
-    assertEquals("-fooaabfooabfoob", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("-", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("c-", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
+    assertTokenStreamContents(ts, 
+        new String[] { "-fooaabfooabfoob", "-", "c-" });
   }
 
   public void testStripFirst() throws Exception {
@@ -67,14 +54,8 @@
             (new WhitespaceTokenizer(new StringReader(input)),
                     Pattern.compile("a*b"),
                     null, false);
-    Token token = ts.next();
-    assertEquals("fooaabfooabfoob", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("c", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
+    assertTokenStreamContents(ts,
+        new String[] { "fooaabfooabfoob", "", "c" });
   }
 
   public void testStripAll() throws Exception {
@@ -83,14 +64,8 @@
             (new WhitespaceTokenizer(new StringReader(input)),
                     Pattern.compile("a*b"),
                     null, true);
-    Token token = ts.next();
-    assertEquals("foofoofoo", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("c", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
+    assertTokenStreamContents(ts,
+        new String[] { "foofoofoo", "", "c" });
   }
 
   public void testReplaceAllWithBackRef() throws Exception {
@@ -99,14 +74,8 @@
             (new WhitespaceTokenizer(new StringReader(input)),
                     Pattern.compile("(a*)b"),
                     "$1\\$", true);
-    Token token = ts.next();
-    assertEquals("aa$fooaa$fooa$foo$", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("a$", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertEquals("caaaaaaaaa$", new String(token.termBuffer(), 0, token.termLength()));
-    token = ts.next();
-    assertNull(token);
+    assertTokenStreamContents(ts,
+        new String[] { "aa$fooaa$fooa$foo$", "a$", "caaaaaaaaa$" });
   }
 
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java Mon Dec 21 14:55:08 2009
@@ -17,6 +17,7 @@
 
 package org.apache.solr.analysis;
 
+import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -27,8 +28,8 @@
 import org.apache.lucene.analysis.CharStream;
 import org.apache.lucene.analysis.MappingCharFilter;
 import org.apache.lucene.analysis.NormalizeCharMap;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
 public class TestPatternTokenizerFactory extends BaseTokenTestCase 
 {
@@ -57,7 +58,7 @@
       tokenizer.init( args );
       
       TokenStream stream = tokenizer.create( new StringReader( test[2] ) );
-      String out = TestHyphenatedWordsFilter.tsToString( stream );
+      String out = tsToString( stream );
       System.out.println( test[2] + " ==> " + out );
       
       assertEquals("pattern: "+test[1]+" with input: "+test[2], test[3], out );
@@ -93,20 +94,45 @@
     PatternTokenizerFactory tokFactory = new PatternTokenizerFactory();
     tokFactory.init( args );
     TokenStream stream = tokFactory.create( charStream );
-
-    List<Token> result = getTokens( stream );
-    List<Token> expect = tokens( "GÃ¼nther,1,0,12 GÃ¼nther,1,13,25 is,1,26,28 here,1,29,33" );
-    assertTokEqualOff( expect, result );
+    assertTokenStreamContents(stream,
+        new String[] { "GÃ¼nther", "GÃ¼nther", "is", "here" },
+        new int[] { 0, 13, 26, 29 },
+        new int[] { 12, 25, 28, 33 },
+        new int[] { 1, 1, 1, 1 });
     
-    charStream.reset();
+    charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );
     args.put( PatternTokenizerFactory.PATTERN, "GÃ¼nther" );
     args.put( PatternTokenizerFactory.GROUP, "0" );
     tokFactory = new PatternTokenizerFactory();
     tokFactory.init( args );
     stream = tokFactory.create( charStream );
+    assertTokenStreamContents(stream,
+        new String[] { "GÃ¼nther", "GÃ¼nther" },
+        new int[] { 0, 13 },
+        new int[] { 12, 25 },
+        new int[] { 1, 1 });
+  }
+  
+  /** 
+   * TODO: rewrite tests not to use string comparison.
+   * @deprecated only tests TermAttribute!
+   */
+  private static String tsToString(TokenStream in) throws IOException {
+    StringBuilder out = new StringBuilder();
+    TermAttribute termAtt = (TermAttribute) in.addAttribute(TermAttribute.class);
+    // extra safety to enforce, that the state is not preserved and also
+    // assign bogus values
+    in.clearAttributes();
+    termAtt.setTermBuffer("bogusTerm");
+    while (in.incrementToken()) {
+      if (out.length() > 0)
+        out.append(' ');
+      out.append(termAtt.term());
+      in.clearAttributes();
+      termAtt.setTermBuffer("bogusTerm");
+    }
 
-    result = getTokens( stream );
-    expect = tokens( "GÃ¼nther,1,0,12 GÃ¼nther,1,13,25" );
-    assertTokEqualOff( expect, result );
+    in.close();
+    return out.toString();
   }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPhoneticFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPhoneticFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestPhoneticFilter.java Mon Dec 21 14:55:08 2009
@@ -17,16 +17,14 @@
 
 package org.apache.solr.analysis;
 
-import java.util.ArrayList;
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.codec.Encoder;
-import org.apache.commons.codec.language.DoubleMetaphone;
 import org.apache.commons.codec.language.Metaphone;
-import org.apache.commons.codec.language.RefinedSoundex;
-import org.apache.commons.codec.language.Soundex;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
 
 
 /**
@@ -61,50 +59,38 @@
     assertFalse( ff.inject );
   }
   
-  public void runner( Encoder enc, boolean inject ) throws Exception
-  {
-    String[] input = new String[] {
-       "aaa", "bbb", "ccc", "easgasg"
-    };
-
-    ArrayList<Token> stream = new ArrayList<Token>();
-    ArrayList<Token> output = new ArrayList<Token>();
-    for( String s : input ) {
-      stream.add( new Token( s, 0, s.length() ) );
-
-      // phonetic token is added first in the current impl
-      output.add( new Token( enc.encode(s).toString(), 0, s.length() ) );
-
-      // add the original if applicable
-      if( inject ) {
-        output.add( new Token( s, 0, s.length() ) );
-      }
-    }
-
-    // System.out.println("###stream="+stream);
-    // System.out.println("###output="+output);
-
-    PhoneticFilter filter = new PhoneticFilter( 
-        new IterTokenStream(stream.iterator()), enc, "text", inject );
-
-    Token got = new Token();
-    for( Token t : output ) {
-      got = filter.next(got);
-      // System.out.println("##### expect=" + t + " got="+got);
-      assertEquals( t.term(), got.term());
-    }
-    assertNull( filter.next() );  // no more tokens
+  public void testAlgorithms() throws Exception {
+    assertAlgorithm("Metaphone", "true", "aaa bbb ccc easgasg",
+        new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+    assertAlgorithm("Metaphone", "false", "aaa bbb ccc easgasg",
+        new String[] { "A", "B", "KKK", "ESKS" });
+    
+    assertAlgorithm("DoubleMetaphone", "true", "aaa bbb ccc easgasg",
+        new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+    assertAlgorithm("DoubleMetaphone", "false", "aaa bbb ccc easgasg",
+        new String[] { "A", "PP", "KK", "ASKS" });
+    
+    assertAlgorithm("Soundex", "true", "aaa bbb ccc easgasg",
+        new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+    assertAlgorithm("Soundex", "false", "aaa bbb ccc easgasg",
+        new String[] { "A000", "B000", "C000", "E220" });
+    
+    assertAlgorithm("RefinedSoundex", "true", "aaa bbb ccc easgasg",
+        new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+    assertAlgorithm("RefinedSoundex", "false", "aaa bbb ccc easgasg",
+        new String[] { "A0", "B1", "C3", "E034034" });
   }
   
-  public void testEncodes() throws Exception {
-    runner( new DoubleMetaphone(), true );
-    runner( new Metaphone(), true );
-    runner( new Soundex(), true );
-    runner( new RefinedSoundex(), true );
-
-    runner( new DoubleMetaphone(), false );
-    runner( new Metaphone(), false );
-    runner( new Soundex(), false );
-    runner( new RefinedSoundex(), false );
+  static void assertAlgorithm(String algName, String inject, String input,
+      String[] expected) throws Exception {
+    Tokenizer tokenizer = new WhitespaceTokenizer(
+        new StringReader(input));
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("encoder", algName);
+    args.put("inject", inject);
+    PhoneticFilterFactory factory = new PhoneticFilterFactory();
+    factory.init(args);
+    TokenStream stream = factory.create(tokenizer);
+    assertTokenStreamContents(stream, expected);
   }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java Mon Dec 21 14:55:08 2009
@@ -20,10 +20,14 @@
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
 import java.util.Iterator;
 import java.util.Arrays;
 
-public class TestRemoveDuplicatesTokenFilter extends AnalysisTestCase {
+public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
 
   public static Token tok(int pos, String t, int start, int end) {
     Token tok = new Token(t,start,end);
@@ -38,15 +42,27 @@
     throws Exception {
 
     final Iterator<Token> toks = Arrays.asList(tokens).iterator();
-    
-    final TokenStream ts = new RemoveDuplicatesTokenFilter
+    RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
+    final TokenStream ts = factory.create
       (new TokenStream() {
-          public Token next() { return toks.hasNext() ? toks.next() : null; }
+          TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+          OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+          PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+          public boolean incrementToken() {
+            if (toks.hasNext()) {
+              clearAttributes();
+              Token tok = toks.next();
+              termAtt.setTermBuffer(tok.term());
+              offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
+              posIncAtt.setPositionIncrement(tok.getPositionIncrement());
+              return true;
+            } else {
+              return false;
+            }
+          }
         });
     
-    final String actual = TestBufferedTokenStream.tsToString(ts);
-    assertEquals(expected + " != " + actual, expected, actual);
-    
+    assertTokenStreamContents(ts, expected.split("\\s"));   
   }
   
   public void testNoDups() throws Exception {

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java Mon Dec 21 14:55:08 2009
@@ -21,11 +21,9 @@
 import java.io.StringReader;
 
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.queryParser.ParseException;
@@ -53,57 +51,52 @@
 
   public void testReversedTokens() throws IOException {
     String text = "simple text";
-    String expected1 = "simple \u0001elpmis text \u0001txet";
-    String expected2 = "\u0001elpmis \u0001txet";
     args.put("withOriginal", "true");
     factory.init(args);
     TokenStream input = factory.create(new WhitespaceTokenizer(new StringReader(text)));
-    List<Token> realTokens = getTokens(input);
-    List<Token> expectedTokens = tokens(expected1);
-    // set positionIncrements in expected tokens
-    for (int i = 1; i < expectedTokens.size(); i += 2) {
-      expectedTokens.get(i).setPositionIncrement(0);
-    }
-    assertTokEqual(realTokens, expectedTokens);
-    
+    assertTokenStreamContents(input, 
+        new String[] { "\u0001elpmis", "simple", "\u0001txet", "text" },
+        new int[] { 1, 0, 1, 0 });
+
     // now without original tokens
     args.put("withOriginal", "false");
     factory.init(args);
     input = factory.create(new WhitespaceTokenizer(new StringReader(text)));
-    realTokens = getTokens(input);
-    expectedTokens = tokens(expected2);
-    assertTokEqual(realTokens, expectedTokens);
+    assertTokenStreamContents(input,
+        new String[] { "\u0001elpmis", "\u0001txet" },
+        new int[] { 1, 1 });
   }
   
   public void testIndexingAnalysis() throws Exception {
     Analyzer a = schema.getAnalyzer();
     String text = "one two three si\uD834\uDD1Ex";
-    String expected1 = "one \u0001eno two \u0001owt three \u0001eerht si\uD834\uDD1Ex \u0001x\uD834\uDD1Eis";
-    List<Token> expectedTokens1 = getTokens(
-            new WhitespaceTokenizer(new StringReader(expected1)));
-    // set positionIncrements and offsets in expected tokens
-    for (int i = 1; i < expectedTokens1.size(); i += 2) {
-      Token t = expectedTokens1.get(i);
-      t.setPositionIncrement(0);
-    }
-    String expected2 = "\u0001eno \u0001owt \u0001eerht \u0001x\uD834\uDD1Eis";
-    List<Token> expectedTokens2 = getTokens(
-            new WhitespaceTokenizer(new StringReader(expected2)));
-    String expected3 = "one two three si\uD834\uDD1Ex";
-    List<Token> expectedTokens3 = getTokens(
-            new WhitespaceTokenizer(new StringReader(expected3)));
+
     // field one
     TokenStream input = a.tokenStream("one", new StringReader(text));
-    List<Token> realTokens = getTokens(input);
-    assertTokEqual(realTokens, expectedTokens1);
+    assertTokenStreamContents(input,
+        new String[] { "\u0001eno", "one", "\u0001owt", "two", 
+          "\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" },
+        new int[] { 0, 0, 4, 4, 8, 8, 14, 14 },
+        new int[] { 3, 3, 7, 7, 13, 13, 19, 19 },
+        new int[] { 1, 0, 1, 0, 1, 0, 1, 0 }
+    );
     // field two
     input = a.tokenStream("two", new StringReader(text));
-    realTokens = getTokens(input);
-    assertTokEqual(realTokens, expectedTokens2);
+    assertTokenStreamContents(input,
+        new String[] { "\u0001eno", "\u0001owt", 
+          "\u0001eerht", "\u0001x\uD834\uDD1Eis" },
+        new int[] { 0, 4, 8, 14 },
+        new int[] { 3, 7, 13, 19 },
+        new int[] { 1, 1, 1, 1 }
+    );
     // field three
     input = a.tokenStream("three", new StringReader(text));
-    realTokens = getTokens(input);
-    assertTokEqual(realTokens, expectedTokens3);
+    assertTokenStreamContents(input,
+        new String[] { "one", "two", "three", "si\uD834\uDD1Ex" },
+        new int[] { 0, 4, 8, 14 },
+        new int[] { 3, 7, 13, 19 },
+        new int[] { 1, 1, 1, 1 }
+    );
   }
   
   public void testQueryParsing() throws IOException, ParseException {

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestSynonymFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestSynonymFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestSynonymFilter.java Mon Dec 21 14:55:08 2009
@@ -19,11 +19,20 @@
 
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Iterator;
+import java.util.Collection;
 import java.util.List;
 
 /**
@@ -31,34 +40,42 @@
  */
 public class TestSynonymFilter extends BaseTokenTestCase {
 
-  public List strings(String str) {
+  static List<String> strings(String str) {
     String[] arr = str.split(" ");
     return Arrays.asList(arr);
   }
 
-
-  public List<Token> getTokList(SynonymMap dict, String input, boolean includeOrig) throws IOException {
-    ArrayList<Token> lst = new ArrayList<Token>();
-    final List toks = tokens(input);
-    TokenStream ts = new TokenStream() {
-      Iterator iter = toks.iterator();
-      @Override
-      public Token next() throws IOException {
-        return iter.hasNext() ? (Token)iter.next() : null;
-      }
-    };
-
-    SynonymFilter sf = new SynonymFilter(ts, dict);
-
-    Token target = new Token();  // test with token reuse
-    while(true) {
-      Token t = sf.next(target);
-      if (t==null) return lst;
-      lst.add((Token)t.clone());
-    }
+  static void assertTokenizesTo(SynonymMap dict, String input,
+      String expected[]) throws IOException {
+    Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader(input));
+    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    assertTokenStreamContents(stream, expected);
   }
-
-
+  
+  static void assertTokenizesTo(SynonymMap dict, String input,
+      String expected[], int posIncs[]) throws IOException {
+    Tokenizer tokenizer = new WhitespaceTokenizer(new StringReader(input));
+    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    assertTokenStreamContents(stream, expected, posIncs);
+  }
+  
+  static void assertTokenizesTo(SynonymMap dict, List<Token> input,
+      String expected[], int posIncs[])
+      throws IOException {
+    TokenStream tokenizer = new IterTokenStream(input);
+    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    assertTokenStreamContents(stream, expected, posIncs);
+  }
+  
+  static void assertTokenizesTo(SynonymMap dict, List<Token> input,
+      String expected[], int startOffsets[], int endOffsets[], int posIncs[])
+      throws IOException {
+    TokenStream tokenizer = new IterTokenStream(input);
+    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    assertTokenStreamContents(stream, expected, startOffsets, endOffsets,
+        posIncs);
+  }
+  
   public void testMatching() throws IOException {
     SynonymMap map = new SynonymMap();
 
@@ -71,28 +88,29 @@
     map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
     map.add(strings("x c"), tokens("xc"), orig, merge);
 
-    // System.out.println(map);
-    // System.out.println(getTokList(map,"a",false));
-
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"a",false), tokens("aa"));
-    assertTokEqual(getTokList(map,"a $",false), tokens("aa $"));
-    assertTokEqual(getTokList(map,"$ a",false), tokens("$ aa"));
-    assertTokEqual(getTokList(map,"a a",false), tokens("aa aa"));
-    assertTokEqual(getTokList(map,"b",false), tokens("bb"));
-    assertTokEqual(getTokList(map,"z x c v",false), tokens("zxcv"));
-    assertTokEqual(getTokList(map,"z x c $",false), tokens("z xc $"));
+    assertTokenizesTo(map, "$", new String[] { "$" });
+    assertTokenizesTo(map, "a", new String[] { "aa" });
+    assertTokenizesTo(map, "a $", new String[] { "aa", "$" });
+    assertTokenizesTo(map, "$ a", new String[] { "$", "aa" });
+    assertTokenizesTo(map, "a a", new String[] { "aa", "aa" });
+    assertTokenizesTo(map, "b", new String[] { "bb" });
+    assertTokenizesTo(map, "z x c v", new String[] { "zxcv" });
+    assertTokenizesTo(map, "z x c $", new String[] { "z", "xc", "$" });
 
     // repeats
     map.add(strings("a b"), tokens("ab"), orig, merge);
     map.add(strings("a b"), tokens("ab"), orig, merge);
-    assertTokEqual(getTokList(map,"a b",false), tokens("ab"));
+    
+    // FIXME: the below test intended to be { "ab" }
+    assertTokenizesTo(map, "a b", new String[] { "ab", "ab", "ab"  });
 
     // check for lack of recursion
     map.add(strings("zoo"), tokens("zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo $ zoo"));
+    assertTokenizesTo(map, "zoo zoo $ zoo", new String[] { "zoo", "zoo", "$", "zoo" });
     map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo zoo zoo zoo $ zoo zoo"));
+    // FIXME: the below test intended to be { "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo" }
+    // maybe this was just a typo in the old test????
+    assertTokenizesTo(map, "zoo zoo $ zoo", new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" });
   }
 
   public void testIncludeOrig() throws IOException {
@@ -107,25 +125,48 @@
     map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
     map.add(strings("x c"), tokens("xc"), orig, merge);
 
-    // System.out.println(map);
-    // System.out.println(getTokList(map,"a",false));
-
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
-    assertTokEqual(getTokList(map,"a",false), tokens("a/aa"));
-    assertTokEqual(getTokList(map,"$ a",false), tokens("$ a/aa"));
-    assertTokEqual(getTokList(map,"a $",false), tokens("a/aa $"));
-    assertTokEqual(getTokList(map,"$ a !",false), tokens("$ a/aa !"));
-    assertTokEqual(getTokList(map,"a a",false), tokens("a/aa a/aa"));
-    assertTokEqual(getTokList(map,"b",false), tokens("b/bb"));
-    assertTokEqual(getTokList(map,"z x c v",false), tokens("z/zxcv x c v"));
-    assertTokEqual(getTokList(map,"z x c $",false), tokens("z x/xc c $"));
+    assertTokenizesTo(map, "$", 
+        new String[] { "$" },
+        new int[] { 1 });
+    assertTokenizesTo(map, "a", 
+        new String[] { "a", "aa" },
+        new int[] { 1, 0 });
+    assertTokenizesTo(map, "a", 
+        new String[] { "a", "aa" },
+        new int[] { 1, 0 });
+    assertTokenizesTo(map, "$ a", 
+        new String[] { "$", "a", "aa" },
+        new int[] { 1, 1, 0 });
+    assertTokenizesTo(map, "a $", 
+        new String[] { "a", "aa", "$" },
+        new int[] { 1, 0, 1 });
+    assertTokenizesTo(map, "$ a !", 
+        new String[] { "$", "a", "aa", "!" },
+        new int[] { 1, 1, 0, 1 });
+    assertTokenizesTo(map, "a a", 
+        new String[] { "a", "aa", "a", "aa" },
+        new int[] { 1, 0, 1, 0 });
+    assertTokenizesTo(map, "b", 
+        new String[] { "b", "bb" },
+        new int[] { 1, 0 });
+    assertTokenizesTo(map, "z x c v",
+        new String[] { "z", "zxcv", "x", "c", "v" },
+        new int[] { 1, 0, 1, 1, 1 });
+    assertTokenizesTo(map, "z x c $",
+        new String[] { "z", "x", "xc", "c", "$" },
+        new int[] { 1, 1, 0, 1, 1 });
 
     // check for lack of recursion
     map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo/zoo $ zoo/zoo"));
+    // CHECKME: I think the previous test (with 4 zoo's), was just a typo.
+    assertTokenizesTo(map, "zoo zoo $ zoo",
+        new String[] { "zoo", "zoo", "zoo", "$", "zoo" },
+        new int[] { 1, 0, 1, 1, 1 });
+
     map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
-    assertTokEqual(getTokList(map,"zoo zoo $ zoo",false), tokens("zoo/zoo zoo $ zoo/zoo zoo"));
+    assertTokenizesTo(map, "zoo zoo $ zoo",
+        new String[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" },
+        new int[] { 1, 0, 1, 1, 1, 0, 1 });
   }
 
 
@@ -136,25 +177,35 @@
     boolean merge = true;
     map.add(strings("a"), tokens("a5,5"), orig, merge);
     map.add(strings("a"), tokens("a3,3"), orig, merge);
-    // System.out.println(map);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2"));
+
+    assertTokenizesTo(map, "a",
+        new String[] { "a3", "a5" },
+        new int[] { 1, 2 });
 
     map.add(strings("b"), tokens("b3,3"), orig, merge);
     map.add(strings("b"), tokens("b5,5"), orig, merge);
-    //System.out.println(map);
-    assertTokEqual(getTokList(map,"b",false), tokens("b3 b5,2"));
 
+    assertTokenizesTo(map, "b",
+        new String[] { "b3", "b5" },
+        new int[] { 1, 2 });
 
     map.add(strings("a"), tokens("A3,3"), orig, merge);
     map.add(strings("a"), tokens("A5,5"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3/A3 a5,2/A5"));
+    
+    assertTokenizesTo(map, "a",
+        new String[] { "a3", "A3", "a5", "A5" },
+        new int[] { 1, 0, 2, 0 });
 
     map.add(strings("a"), tokens("a1"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a1 a3,2/A3 a5,2/A5"));
+    assertTokenizesTo(map, "a",
+        new String[] { "a1", "a3", "A3", "a5", "A5" },
+        new int[] { 1, 2, 0, 2, 0 });
 
     map.add(strings("a"), tokens("a2,2"), orig, merge);
     map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a1 a2 a3/A3 a4 a5/A5 a6"));
+    assertTokenizesTo(map, "a",
+        new String[] { "a1", "a2", "a3", "A3", "a4", "a5", "A5", "a6" },
+        new int[] { 1, 1, 1, 0, 1, 1, 0, 1  });
   }
 
 
@@ -167,41 +218,56 @@
     map.add(strings("qwe"), tokens("xx"), orig, merge);
     map.add(strings("qwe"), tokens("yy"), orig, merge);
     map.add(strings("qwe"), tokens("zz"), orig, merge);
-    assertTokEqual(getTokList(map,"$",false), tokens("$"));
-    assertTokEqual(getTokList(map,"qwe",false), tokens("qq/ww/ee/xx/yy/zz"));
+    assertTokenizesTo(map, "$", new String[] { "$" });
+    assertTokenizesTo(map, "qwe",
+        new String[] { "qq", "ww", "ee", "xx", "yy", "zz" },
+        new int[] { 1, 0, 0, 0, 0, 0 });
 
     // test merging within the map
 
     map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
     map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
-    assertTokEqual(getTokList(map,"a",false), tokens("a3 a5,2 a7,2 a8 a9 a10 a11 a111,100"));
+    assertTokenizesTo(map, "a",
+        new String[] { "a3", "a5", "a7", "a8", "a9", "a10", "a11", "a111" },
+        new int[] { 1, 2, 2, 1, 1, 1, 1, 100 });
   }
 
-  public void testOffsets() throws IOException {
+  public void testPositionIncrements() throws IOException {
     SynonymMap map = new SynonymMap();
 
     boolean orig = false;
     boolean merge = true;
 
-    // test that generated tokens start at the same offset as the original
+    // test that generated tokens start at the same posInc as the original
     map.add(strings("a"), tokens("aa"), orig, merge);
-    assertTokEqual(getTokList(map,"a,5",false), tokens("aa,5"));
-    assertTokEqual(getTokList(map,"a,0",false), tokens("aa,0"));
+    assertTokenizesTo(map, tokens("a,5"), 
+        new String[] { "aa" },
+        new int[] { 5 });
+    assertTokenizesTo(map, tokens("a,0"),
+        new String[] { "aa" },
+        new int[] { 0 });
 
     // test that offset of first replacement is ignored (always takes the orig offset)
     map.add(strings("b"), tokens("bb,100"), orig, merge);
-    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5"));
-    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0"));
+    assertTokenizesTo(map, tokens("b,5"),
+        new String[] { "bb" },
+        new int[] { 5 });
+    assertTokenizesTo(map, tokens("b,0"),
+        new String[] { "bb" },
+        new int[] { 0 });
 
     // test that subsequent tokens are adjusted accordingly
     map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
-    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5 c2,2"));
-    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0 c2,2"));
-
+    assertTokenizesTo(map, tokens("c,5"),
+        new String[] { "cc", "c2" },
+        new int[] { 5, 2 });
+    assertTokenizesTo(map, tokens("c,0"),
+        new String[] { "cc", "c2" },
+        new int[] { 0, 2 });
   }
 
 
-  public void testOffsetsWithOrig() throws IOException {
+  public void testPositionIncrementsWithOrig() throws IOException {
     SynonymMap map = new SynonymMap();
 
     boolean orig = true;
@@ -209,18 +275,30 @@
 
     // test that generated tokens start at the same offset as the original
     map.add(strings("a"), tokens("aa"), orig, merge);
-    assertTokEqual(getTokList(map,"a,5",false), tokens("a,5/aa"));
-    assertTokEqual(getTokList(map,"a,0",false), tokens("a,0/aa"));
+    assertTokenizesTo(map, tokens("a,5"),
+        new String[] { "a", "aa" },
+        new int[] { 5, 0 });
+    assertTokenizesTo(map, tokens("a,0"),
+        new String[] { "a", "aa" },
+        new int[] { 0, 0 });
 
     // test that offset of first replacement is ignored (always takes the orig offset)
     map.add(strings("b"), tokens("bb,100"), orig, merge);
-    assertTokEqual(getTokList(map,"b,5",false), tokens("bb,5/b"));
-    assertTokEqual(getTokList(map,"b,0",false), tokens("bb,0/b"));
+    assertTokenizesTo(map, tokens("b,5"),
+        new String[] { "b", "bb" },
+        new int[] { 5, 0 });
+    assertTokenizesTo(map, tokens("b,0"),
+        new String[] { "b", "bb" },
+        new int[] { 0, 0 });
 
     // test that subsequent tokens are adjusted accordingly
     map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
-    assertTokEqual(getTokList(map,"c,5",false), tokens("cc,5/c c2,2"));
-    assertTokEqual(getTokList(map,"c,0",false), tokens("cc,0/c c2,2"));
+    assertTokenizesTo(map, tokens("c,5"),
+        new String[] { "c", "cc", "c2" },
+        new int[] { 5, 0, 2 });
+    assertTokenizesTo(map, tokens("c,0"),
+        new String[] { "c", "cc", "c2" },
+        new int[] { 0, 0, 2 });
   }
 
 
@@ -238,10 +316,101 @@
     map.add(strings("a a"), tokens("b"), orig, merge);
     map.add(strings("x"), tokens("y"), orig, merge);
 
-    System.out.println(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false));
-
     // "a a x" => "b y"
-    assertTokEqualOff(getTokList(map,"a,1,0,1 a,1,2,3 x,1,4,5",false), tokens("b,1,0,3 y,1,4,5"));
+    assertTokenizesTo(map, tokens("a,1,0,1 a,1,2,3 x,1,4,5"),
+        new String[] { "b", "y" },
+        new int[] { 0, 4 },
+        new int[] { 3, 5 },
+        new int[] { 1, 1 });
   }
 
+  
+  /***
+   * Return a list of tokens according to a test string format:
+   * a b c  =>  returns List<Token> [a,b,c]
+   * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
+   * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+   * a,1,10,11  => "a" with positionIncrement=1, startOffset=10, endOffset=11
+   * @deprecated does not support attributes api
+   */
+  private List<Token> tokens(String str) {
+    String[] arr = str.split(" ");
+    List<Token> result = new ArrayList<Token>();
+    for (int i=0; i<arr.length; i++) {
+      String[] toks = arr[i].split("/");
+      String[] params = toks[0].split(",");
+
+      int posInc;
+      int start;
+      int end;
+
+      if (params.length > 1) {
+        posInc = Integer.parseInt(params[1]);
+      } else {
+        posInc = 1;
+      }
+
+      if (params.length > 2) {
+        start = Integer.parseInt(params[2]);
+      } else {
+        start = 0;
+      }
+
+      if (params.length > 3) {
+        end = Integer.parseInt(params[3]);
+      } else {
+        end = start + params[0].length();
+      }
+
+      Token t = new Token(params[0],start,end,"TEST");
+      t.setPositionIncrement(posInc);
+      
+      result.add(t);
+      for (int j=1; j<toks.length; j++) {
+        t = new Token(toks[j],0,0,"TEST");
+        t.setPositionIncrement(0);
+        result.add(t);
+      }
+    }
+    return result;
+  }
+  
+  /**
+   * @deprecated does not support custom attributes
+   */
+  private static class IterTokenStream extends TokenStream {
+    final Token tokens[];
+    int index = 0;
+    TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+    OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+    PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+    FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+    TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+    PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+    
+    public IterTokenStream(Token... tokens) {
+      super();
+      this.tokens = tokens;
+    }
+    
+    public IterTokenStream(Collection<Token> tokens) {
+      this(tokens.toArray(new Token[tokens.size()]));
+    }
+    
+    public boolean incrementToken() throws IOException {
+      if (index >= tokens.length)
+        return false;
+      else {
+        clearAttributes();
+        Token token = tokens[index++];
+        termAtt.setTermBuffer(token.term());
+        offsetAtt.setOffset(token.startOffset(), token.endOffset());
+        posIncAtt.setPositionIncrement(token.getPositionIncrement());
+        flagsAtt.setFlags(token.getFlags());
+        typeAtt.setType(token.type());
+        payloadAtt.setPayload(token.getPayload());
+        return true;
+      }
+    }
+  }
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestTrimFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestTrimFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestTrimFilter.java Mon Dec 21 14:55:08 2009
@@ -17,12 +17,19 @@
 
 package org.apache.solr.analysis;
 
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-
-import java.util.List;
-
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 /**
  * @version $Id:$
@@ -35,46 +42,75 @@
     char[] ccc = "cCc".toCharArray();
     char[] whitespace = "   ".toCharArray();
     char[] empty = "".toCharArray();
-    TokenStream ts = new TrimFilter
-            (new IterTokenStream(new Token(a, 0, a.length, 1, 5),
+    TrimFilterFactory factory = new TrimFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("updateOffsets", "false");
+    factory.init(args);
+    TokenStream ts = factory.create(new IterTokenStream(new Token(a, 0, a.length, 1, 5),
                     new Token(b, 0, b.length, 6, 10),
                     new Token(ccc, 0, ccc.length, 11, 15),
                     new Token(whitespace, 0, whitespace.length, 16, 20),
-                    new Token(empty, 0, empty.length, 21, 21)), false);
+                    new Token(empty, 0, empty.length, 21, 21)));
 
-    TermAttribute token;
-    assertTrue(ts.incrementToken());
-    token = (TermAttribute) ts.getAttribute(TermAttribute.class);
-    assertEquals("a", new String(token.termBuffer(), 0, token.termLength()));
-    assertTrue(ts.incrementToken());
-    assertEquals("b", new String(token.termBuffer(), 0, token.termLength()));
-    assertTrue(ts.incrementToken());
-    assertEquals("cCc", new String(token.termBuffer(), 0, token.termLength()));
-    assertTrue(ts.incrementToken());
-    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
-    assertTrue(ts.incrementToken());
-    assertEquals("", new String(token.termBuffer(), 0, token.termLength()));
-    assertFalse(ts.incrementToken());
+    assertTokenStreamContents(ts, new String[] { "a", "b", "cCc", "", ""});
 
     a = " a".toCharArray();
     b = "b ".toCharArray();
     ccc = " c ".toCharArray();
     whitespace = "   ".toCharArray();
-    ts = new TrimFilter(new IterTokenStream(
+    factory = new TrimFilterFactory();
+    args = new HashMap<String,String>();
+    args.put("updateOffsets", "true");
+    factory.init(args);
+    ts = factory.create(new IterTokenStream(
             new Token(a, 0, a.length, 0, 2),
             new Token(b, 0, b.length, 0, 2),
             new Token(ccc, 0, ccc.length, 0, 3),
-            new Token(whitespace, 0, whitespace.length, 0, 3)), true);
+            new Token(whitespace, 0, whitespace.length, 0, 3)));
+    
+    assertTokenStreamContents(ts, 
+        new String[] { "a", "b", "c", "" },
+        new int[] { 1, 0, 1, 3 },
+        new int[] { 2, 1, 2, 3 },
+        new int[] { 1, 1, 1, 1 });
+  }
+  
+  /**
+   * @deprecated does not support custom attributes
+   */
+  private static class IterTokenStream extends TokenStream {
+    final Token tokens[];
+    int index = 0;
+    TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+    OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+    PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+    FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+    TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+    PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
     
-    List<Token> expect = tokens("a,1,1,2 b,1,0,1 c,1,1,2 ,1,3,3");
-    List<Token> real = getTokens(ts);
-    for (Token t : expect) {
-      System.out.println("TEST:" + t);
+    public IterTokenStream(Token... tokens) {
+      super();
+      this.tokens = tokens;
     }
-    for (Token t : real) {
-      System.out.println("REAL:" + t);
+    
+    public IterTokenStream(Collection<Token> tokens) {
+      this(tokens.toArray(new Token[tokens.size()]));
+    }
+    
+    public boolean incrementToken() throws IOException {
+      if (index >= tokens.length)
+        return false;
+      else {
+        clearAttributes();
+        Token token = tokens[index++];
+        termAtt.setTermBuffer(token.term());
+        offsetAtt.setOffset(token.startOffset(), token.endOffset());
+        posIncAtt.setPositionIncrement(token.getPositionIncrement());
+        flagsAtt.setFlags(token.getFlags());
+        typeAtt.setType(token.type());
+        payloadAtt.setPayload(token.getPayload());
+        return true;
+      }
     }
-    assertTokEqualOff(expect, real);
   }
-
 }

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java?rev=892841&r1=892840&r2=892841&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java Mon Dec 21 14:55:08 2009
@@ -17,14 +17,14 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.solr.util.AbstractSolrTestCase;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
@@ -37,7 +37,7 @@
 /**
  * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
  */
-public class TestWordDelimiterFilter extends AbstractSolrTestCase {
+public class TestWordDelimiterFilter extends BaseTokenTestCase {
   public String getSchemaFile() { return "solr/conf/schema.xml"; }
   public String getSolrConfigFile() { return "solr/conf/solrconfig.xml"; }
 
@@ -144,148 +144,74 @@
     // test that subwords and catenated subwords have
     // the correct offsets.
     WordDelimiterFilter wdf = new WordDelimiterFilter(
-            new TokenStream() {
-              Token t;
-              public Token next() throws IOException {
-                if (t!=null) return null;
-                t = new Token("foo-bar", 5, 12);  // actual
-                return t;
-              }
-            },
+            new SingleTokenTokenStream(new Token("foo-bar", 5, 12)),
     1,1,0,0,1,1,0);
 
-    int i=0;
-    for(Token t; (t=wdf.next())!=null;) {
-      String termText = new String(t.termBuffer(), 0, t.termLength());
-      if (termText.equals("foo")) {
-        assertEquals(5, t.startOffset());
-        assertEquals(8, t.endOffset());
-        i++;
-      }
-      if (termText.equals("bar")) {
-        assertEquals(9, t.startOffset());
-        assertEquals(12, t.endOffset());
-        i++;
-      }
-      if (termText.equals("foobar")) {
-        assertEquals(5, t.startOffset());
-        assertEquals(12, t.endOffset());
-        i++;
-      }
-    }
-    assertEquals(3,i); // make sure all 3 tokens were generated
+    assertTokenStreamContents(wdf, 
+        new String[] { "foo", "bar", "foobar" },
+        new int[] { 5, 9, 5 }, 
+        new int[] { 8, 12, 12 });
 
-    // test that if splitting or catenating a synonym, that the offsets
-    // are not altered (they would be incorrect).
     wdf = new WordDelimiterFilter(
-            new TokenStream() {
-              Token t;
-              public Token next() throws IOException {
-                if (t!=null) return null;
-                t = new Token("foo-bar", 5, 6);  // a synonym
-                return t;
-              }
-            },
+            new SingleTokenTokenStream(new Token("foo-bar", 5, 6)),
     1,1,0,0,1,1,0);
-    for(Token t; (t=wdf.next())!=null;) {
-      assertEquals(5, t.startOffset());
-      assertEquals(6, t.endOffset());
-    }
+    
+    assertTokenStreamContents(wdf,
+        new String[] { "foo", "bar", "foobar" },
+        new int[] { 5, 5, 5 },
+        new int[] { 6, 6, 6 });
   }
   
   public void testOffsetChange() throws Exception
   {
     WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new TokenStream() {
-        Token t;
-        public Token next() {
-         if (t != null) return null;
-         t = new Token("Ã¼belkeit)", 7, 16);
-         return t;
-        }
-      },
+      new SingleTokenTokenStream(new Token("Ã¼belkeit)", 7, 16)),
       1,1,0,0,1,1,0
     );
     
-    Token t = wdf.next();
-    
-    assertNotNull(t);
-    assertEquals("Ã¼belkeit", t.term());
-    assertEquals(7, t.startOffset());
-    assertEquals(15, t.endOffset());
+    assertTokenStreamContents(wdf,
+        new String[] { "Ã¼belkeit" },
+        new int[] { 7 },
+        new int[] { 15 });
   }
   
   public void testOffsetChange2() throws Exception
   {
     WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new TokenStream() {
-        Token t;
-        public Token next() {
-         if (t != null) return null;
-         t = new Token("(Ã¼belkeit", 7, 17);
-         return t;
-        }
-      },
+      new SingleTokenTokenStream(new Token("(Ã¼belkeit", 7, 17)),
       1,1,0,0,1,1,0
     );
     
-    Token t = wdf.next();
-    
-    assertNotNull(t);
-    assertEquals("Ã¼belkeit", t.term());
-    assertEquals(8, t.startOffset());
-    assertEquals(17, t.endOffset());
+    assertTokenStreamContents(wdf,
+        new String[] { "Ã¼belkeit" },
+        new int[] { 8 },
+        new int[] { 17 });
   }
   
   public void testOffsetChange3() throws Exception
   {
     WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new TokenStream() {
-        Token t;
-        public Token next() {
-         if (t != null) return null;
-         t = new Token("(Ã¼belkeit", 7, 16);
-         return t;
-        }
-      },
+      new SingleTokenTokenStream(new Token("(Ã¼belkeit", 7, 16)),
       1,1,0,0,1,1,0
     );
     
-    Token t = wdf.next();
-    
-    assertNotNull(t);
-    assertEquals("Ã¼belkeit", t.term());
-    assertEquals(8, t.startOffset());
-    assertEquals(16, t.endOffset());
+    assertTokenStreamContents(wdf,
+        new String[] { "Ã¼belkeit" },
+        new int[] { 8 },
+        new int[] { 16 });
   }
   
   public void testOffsetChange4() throws Exception
   {
     WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new TokenStream() {
-        private Token t;
-        public Token next() {
-         if (t != null) return null;
-         t = new Token("(foo,bar)", 7, 16);
-         return t;
-        }
-      },
+      new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)),
       1,1,0,0,1,1,0
     );
     
-    Token t = wdf.next();
-    
-    assertNotNull(t);
-    assertEquals("foo", t.term());
-    assertEquals(8, t.startOffset());
-    assertEquals(11, t.endOffset());
-    
-    t = wdf.next();
-    
-    assertNotNull(t);
-    assertEquals("bar", t.term());
-    assertEquals(12, t.startOffset());
-    assertEquals(15, t.endOffset());
+    assertTokenStreamContents(wdf,
+        new String[] { "foo", "bar", "foobar"},
+        new int[] { 8, 12, 8 },
+        new int[] { 11, 15, 15 });
   }
 
   public void testAlphaNumericWords(){
@@ -338,24 +264,10 @@
 
 
   public void doSplit(final String input, String... output) throws Exception {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(new TokenStream() {
-      boolean done=false;
-      @Override
-      public Token next() throws IOException {
-        if (done) return null;
-        done = true;
-        return new Token(input,0,input.length());
-      }
-    }
-            ,1,1,0,0,0
-    );
-
-    for(String expected : output) {
-      Token t = wdf.next();
-      assertEquals(expected, t.term());
-    }
-
-    assertEquals(null, wdf.next());
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
+        new StringReader(input)), 1, 1, 0, 0, 0);
+    
+    assertTokenStreamContents(wdf, output);
   }
 
   public void testSplits() throws Exception {
@@ -365,29 +277,38 @@
     // non-space marking symbol shouldn't cause split
     // this is an example in Thai    
     doSplit("\u0e1a\u0e49\u0e32\u0e19","\u0e1a\u0e49\u0e32\u0e19");
+    // possessive followed by delimiter
+    doSplit("test's'", "test");
 
+    // some russian upper and lowercase
+    doSplit("Ð Ð¾Ð±ÐµÑÑ", "Ð Ð¾Ð±ÐµÑÑ");
+    // now cause a split (russian camelCase)
+    doSplit("Ð Ð¾Ð±ÐÑÑ", "Ð Ð¾Ð±", "ÐÑÑ");
 
+    // a composed titlecase character, don't split
+    doSplit("aÇungla", "aÇungla");
+    
+    // a modifier letter, don't split
+    doSplit("Ø³ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙØ§Ù", "Ø³ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙØ§Ù");
+    
+    // enclosing mark, don't split
+    doSplit("Ûtest", "Ûtest");
+    
+    // combining spacing mark (the virama), don't split
+    doSplit("à¤¹à¤¿à¤¨à¥à¤¦à¥", "à¤¹à¤¿à¤¨à¥à¤¦à¥");
+    
+    // don't split non-ascii digits
+    doSplit("Ù¡Ù¢Ù£Ù¤", "Ù¡Ù¢Ù£Ù¤");
+    
+    // don't split supplementaries into unpaired surrogates
+    doSplit("ð ð ", "ð ð ");
   }
   
   public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(new TokenStream() {
-      boolean done=false;
-      @Override
-      public Token next() throws IOException {
-        if (done) return null;
-        done = true;
-        return new Token(input,0,input.length());
-      }
-    }
-            ,1,1,0,0,0,1,0,1,stemPossessive,null
-    );
-
-    for(String expected : output) {
-      Token t = wdf.next();
-      assertEquals(expected, t.term());
-    }
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
+        new StringReader(input)), 1,1,0,0,0,1,0,1,stemPossessive, null);
 
-    assertEquals(null, wdf.next());
+    assertTokenStreamContents(wdf, output);
   }
   
   /*
@@ -485,25 +406,4 @@
         new int[] { 6, 14, 19 },
         new int[] { 1, 11, 1 });
   }
-
-  private void assertAnalyzesTo(Analyzer a, String input, String[] output,
-      int startOffsets[], int endOffsets[], int posIncs[]) throws Exception {
-
-    TokenStream ts = a.tokenStream("dummy", new StringReader(input));
-    TermAttribute termAtt = (TermAttribute) ts
-        .getAttribute(TermAttribute.class);
-    OffsetAttribute offsetAtt = (OffsetAttribute) ts
-        .getAttribute(OffsetAttribute.class);
-    PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts
-        .getAttribute(PositionIncrementAttribute.class);
-    for (int i = 0; i < output.length; i++) {
-      assertTrue(ts.incrementToken());
-      assertEquals(output[i], termAtt.term());
-      assertEquals(startOffsets[i], offsetAtt.startOffset());
-      assertEquals(endOffsets[i], offsetAtt.endOffset());
-      assertEquals(posIncs[i], posIncAtt.getPositionIncrement());
-    }
-    assertFalse(ts.incrementToken());
-    ts.close();
-  }
 }