You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2011/05/22 23:45:45 UTC

svn commit: r1126234 [21/28] - in /lucene/dev/branches/solr2452: ./ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contri...

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
  * limitations under the License.
  */
 
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@@ -32,7 +33,7 @@ public class EdgeNGramTokenFilterTest ex
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+    input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
   }
 
   public void testInvalidInput() throws Exception {
@@ -91,7 +92,7 @@ public class EdgeNGramTokenFilterTest ex
   }
   
   public void testSmallTokenInStream() throws Exception {
-    input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+    input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
     assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
   }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ngram
  * limitations under the License.
  */
 
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@@ -32,7 +33,7 @@ public class NGramTokenFilterTest extend
     @Override
     public void setUp() throws Exception {
         super.setUp();
-        input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+        input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
     }
 
     public void testInvalidInput() throws Exception {
@@ -80,7 +81,7 @@ public class NGramTokenFilterTest extend
     }
     
     public void testSmallTokenInStream() throws Exception {
-      input = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abc de fgh"));
+      input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
       NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
       assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
     }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Sun May 22 21:45:19 2011
@@ -185,4 +185,9 @@ public class TestDutchStemmer extends Ba
     checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); 
   }
   
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
+  
 }
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer exten
     checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
     checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Sun May 22 21:45:19 2011
@@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer 
         new int[]{1, 0, 0, 0},
         path.length());
   }
+
+  public void testBasicSkip() throws Exception {
+    String path = "/a/b/c";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{"/b", "/b/c"},
+        new int[]{2, 2},
+        new int[]{4, 6},
+        new int[]{1, 0},
+        path.length());
+  }
+
+  public void testEndOfDelimiterSkip() throws Exception {
+    String path = "/a/b/c/";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{"/b", "/b/c", "/b/c/"},
+        new int[]{2, 2, 2},
+        new int[]{4, 6, 7},
+        new int[]{1, 0, 0},
+        path.length());
+  }
+
+  public void testStartOfCharSkip() throws Exception {
+    String path = "a/b/c";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{"/b", "/b/c"},
+        new int[]{1, 1},
+        new int[]{3, 5},
+        new int[]{1, 0},
+        path.length());
+  }
+
+  public void testStartOfCharEndOfDelimiterSkip() throws Exception {
+    String path = "a/b/c/";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{"/b", "/b/c", "/b/c/"},
+        new int[]{1, 1, 1},
+        new int[]{3, 5, 6},
+        new int[]{1, 0, 0},
+        path.length());
+  }
+
+  public void testOnlyDelimiterSkip() throws Exception {
+    String path = "/";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{},
+        new int[]{},
+        new int[]{},
+        new int[]{},
+        path.length());
+  }
+
+  public void testOnlyDelimitersSkip() throws Exception {
+    String path = "//";
+    PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+    assertTokenStreamContents(t,
+        new String[]{"/"},
+        new int[]{1},
+        new int[]{2},
+        new int[]{1},
+        path.length());
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Sun May 22 21:45:19 2011
@@ -24,8 +24,8 @@ import java.util.regex.Pattern;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /**
  * Tests {@link PatternReplaceCharFilter}
@@ -39,7 +39,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "this is test.";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "this", "is", "test." },
         new int[] { 0, 5, 8 },
@@ -52,8 +52,8 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "aa bb cc";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
-    assertFalse(ts.incrementToken());
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+    assertTokenStreamContents(ts, new String[] {});
   }
   
   // 012345678
@@ -63,7 +63,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "aa bb cc";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa#bb#cc" },
         new int[] { 0 },
@@ -78,7 +78,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "aa bb cc dd";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa##bb###cc", "dd" },
         new int[] { 0, 9 },
@@ -92,7 +92,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = " a  a";
     CharStream cs = new PatternReplaceCharFilter( pattern("a"), "aa",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa", "aa" },
         new int[] { 1, 4 },
@@ -107,7 +107,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "aa  bb   cc dd";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa#bb", "dd" },
         new int[] { 0, 12 },
@@ -122,7 +122,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "  aa bb cc --- aa bb aa   bb   cc";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1  $2  $3",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
         new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 },
@@ -137,7 +137,7 @@ public class TestPatternReplaceCharFilte
     final String BLOCK = "  aa bb cc --- aa bb aa. bb aa   bb cc";
     CharStream cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2", ".",
           CharReader.get( new StringReader( BLOCK ) ) );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
         new int[] { 2, 8, 11, 15, 21, 25, 28, 36 },
@@ -154,7 +154,7 @@ public class TestPatternReplaceCharFilte
         CharReader.get( new StringReader( BLOCK ) ) );
     cs = new PatternReplaceCharFilter( pattern("bb"), "b", ".", cs );
     cs = new PatternReplaceCharFilter( pattern("ccc"), "c", ".", cs );
-    TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts,
         new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
         new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 },

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceFilter.java Sun May 22 21:45:19 2011
@@ -18,8 +18,8 @@
 package org.apache.lucene.analysis.pattern;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 import java.io.StringReader;
 import java.util.regex.Pattern;
@@ -32,7 +32,7 @@ public class TestPatternReplaceFilter ex
   public void testReplaceAll() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
     TokenStream ts = new PatternReplaceFilter
-            (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+            (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                     Pattern.compile("a*b"),
                     "-", true);
     assertTokenStreamContents(ts, 
@@ -42,7 +42,7 @@ public class TestPatternReplaceFilter ex
   public void testReplaceFirst() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
     TokenStream ts = new PatternReplaceFilter
-            (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+            (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                     Pattern.compile("a*b"),
                     "-", false);
     assertTokenStreamContents(ts, 
@@ -52,7 +52,7 @@ public class TestPatternReplaceFilter ex
   public void testStripFirst() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
     TokenStream ts = new PatternReplaceFilter
-            (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+            (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                     Pattern.compile("a*b"),
                     null, false);
     assertTokenStreamContents(ts,
@@ -62,7 +62,7 @@ public class TestPatternReplaceFilter ex
   public void testStripAll() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
     TokenStream ts = new PatternReplaceFilter
-            (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+            (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                     Pattern.compile("a*b"),
                     null, true);
     assertTokenStreamContents(ts,
@@ -72,7 +72,7 @@ public class TestPatternReplaceFilter ex
   public void testReplaceAllWithBackRef() throws Exception {
     String input = "aabfooaabfooabfoob ab caaaaaaaaab";
     TokenStream ts = new PatternReplaceFilter
-            (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)),
+            (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                     Pattern.compile("(a*)b"),
                     "$1\\$", true);
     assertTokenStreamContents(ts,

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -16,8 +16,8 @@ package org.apache.lucene.analysis.paylo
  * limitations under the License.
  */
 
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.index.Payload;
@@ -30,7 +30,7 @@ public class DelimitedPayloadTokenFilter
   public void testPayloads() throws Exception {
     String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
-      (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), 
+      (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), 
        DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
     CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
     PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
@@ -51,7 +51,7 @@ public class DelimitedPayloadTokenFilter
 
     String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
-      (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), 
+      (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), 
        DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
     assertTermEquals("The", filter, null);
     assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
@@ -69,7 +69,7 @@ public class DelimitedPayloadTokenFilter
 
   public void testFloatEncoding() throws Exception {
     String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
-    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
     CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
     PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
     assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -87,7 +87,7 @@ public class DelimitedPayloadTokenFilter
 
   public void testIntEncoding() throws Exception {
     String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
-    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
     CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
     PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
     assertTermEquals("The", filter, termAtt, payAtt, null);
@@ -106,6 +106,7 @@ public class DelimitedPayloadTokenFilter
   void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
+    stream.reset();
     assertTrue(stream.incrementToken());
     assertEquals(expected, termAtt.toString());
     Payload payload = payloadAtt.getPayload();
@@ -122,6 +123,7 @@ public class DelimitedPayloadTokenFilter
 
 
   void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+    stream.reset();
     assertTrue(stream.incrementToken());
     assertEquals(expected, termAtt.toString());
     Payload payload = payAtt.getPayload();

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
  */
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,11 +32,12 @@ public class NumericPayloadTokenFilterTe
   public void test() throws IOException {
     String test = "The quick red fox jumped over the lazy brown dogs";
 
-    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
+    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), 3, "D");
     boolean seenDogs = false;
     CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
     PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
+    nptf.reset();
     while (nptf.incrementToken()) {
       if (termAtt.toString().equals("dogs")) {
         seenDogs = true;

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.paylo
  */
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.index.Payload;
@@ -30,11 +30,11 @@ public class TokenOffsetPayloadTokenFilt
   public void test() throws IOException {
     String test = "The quick red fox jumped over the lazy brown dogs";
 
-    TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+    TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
     int count = 0;
     PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
     OffsetAttribute offsetAtt = nptf.getAttribute(OffsetAttribute.class);
-    
+    nptf.reset();
     while (nptf.incrementToken()) {
       Payload pay = payloadAtt.getPayload();
       assertTrue("pay is null and it shouldn't be", pay != null);

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Sun May 22 21:45:19 2011
@@ -17,9 +17,9 @@ package org.apache.lucene.analysis.paylo
  */
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@@ -32,12 +32,12 @@ public class TypeAsPayloadTokenFilterTes
   public void test() throws IOException {
     String test = "The quick red fox jumped over the lazy brown dogs";
 
-    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
     int count = 0;
     CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
     PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
-    
+    nptf.reset();
     while (nptf.incrementToken()) {
       assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
       assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer exte
     checkOneTermReuse(a, "quilométricas", "quilométricas");
     checkOneTermReuse(a, "quilométricos", "quilométr");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java Sun May 22 21:45:19 2011
@@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilt
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java Sun May 22 21:45:19 2011
@@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFi
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Sun May 22 21:45:19 2011
@@ -66,4 +66,9 @@ public class TestPortugueseStemFilter ex
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java Sun May 22 21:45:19 2011
@@ -22,6 +22,8 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.LetterTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
@@ -50,7 +52,7 @@ public class QueryAutoStopWordAnalyzerTe
   public void setUp() throws Exception {
     super.setUp();
     dir = new RAMDirectory();
-    appAnalyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+    appAnalyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
     IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, appAnalyzer));
     int numDocs = 200;
     for (int i = 0; i < numDocs; i++) {
@@ -159,9 +161,9 @@ public class QueryAutoStopWordAnalyzerTe
     @Override
     public TokenStream tokenStream(String fieldName, Reader reader) {
       if (++invocationCount % 2 == 0)
-        return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+        return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
       else
-        return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+        return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
     }
   }
   
@@ -175,7 +177,7 @@ public class QueryAutoStopWordAnalyzerTe
   }
   
   public void testTokenStream() throws Exception {
-    QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+    QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
     a.addStopWords(reader, 10);
     TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
     assertTokenStreamContents(ts, new String[] { "this" });

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Sun May 22 21:45:19 2011
@@ -19,22 +19,22 @@ package org.apache.lucene.analysis.rever
 
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 
 public class TestReverseStringFilter extends BaseTokenStreamTestCase {
   public void testFilter() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, 
-        new StringReader("Do have a nice day"));     // 1-4 length string
+    TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+        MockTokenizer.WHITESPACE, false);     // 1-4 length string
     ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
     assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
   }
   
   public void testFilterWithMark() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
-        "Do have a nice day")); // 1-4 length string
+    TokenStream stream = new MockTokenizer(new StringReader("Do have a nice day"),
+        MockTokenizer.WHITESPACE, false); // 1-4 length string
     ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
     assertTokenStreamContents(filter, 
         new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" });

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extend
     checkOneTermReuse(a, "absenţa", "absenţa");
     checkOneTermReuse(a, "absenţi", "absenţ");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Sun May 22 21:45:19 2011
@@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends
           new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
      
     }
+    
+    /** blast some random strings through the analyzer */
+    public void testRandomStrings() throws Exception {
+      checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+    }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 
 import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestRussianLightStemFilter 
     @Override
     protected TokenStreamComponents createComponents(String fieldName,
         Reader reader) {
-      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
       return new TokenStreamComponents(source, new RussianLightStemFilter(source));
     }
   };
@@ -45,4 +45,9 @@ public class TestRussianLightStemFilter 
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Sun May 22 21:45:19 2011
@@ -22,10 +22,9 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
@@ -106,7 +105,7 @@ public class ShingleAnalyzerWrapperTest 
    */
   public void testShingleAnalyzerWrapperQueryParsing() throws Exception {
     ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
-                                     (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+                                     (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
                                  "test sentence");
     int[] ranks = new int[] { 1, 2, 0 };
     compareRanks(hits, ranks);
@@ -117,7 +116,7 @@ public class ShingleAnalyzerWrapperTest 
    */
   public void testShingleAnalyzerWrapperPhraseQueryParsingFails() throws Exception {
     ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
-                                     (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+                                     (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
                                  "\"this sentence\"");
     int[] ranks = new int[] { 0 };
     compareRanks(hits, ranks);
@@ -128,7 +127,7 @@ public class ShingleAnalyzerWrapperTest 
    */
   public void testShingleAnalyzerWrapperPhraseQueryParsing() throws Exception {
     ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
-                                     (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+                                     (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
                                  "\"test sentence\"");
     int[] ranks = new int[] { 1 };
     compareRanks(hits, ranks);
@@ -139,7 +138,7 @@ public class ShingleAnalyzerWrapperTest 
    */
   public void testShingleAnalyzerWrapperRequiredQueryParsing() throws Exception {
     ScoreDoc[] hits = queryParsingTest(new ShingleAnalyzerWrapper
-                                     (new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2),
+                                     (new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2),
                                  "+test +sentence");
     int[] ranks = new int[] { 1, 2 };
     compareRanks(hits, ranks);
@@ -149,7 +148,7 @@ public class ShingleAnalyzerWrapperTest 
    * This shows how to construct a phrase query containing shingles.
    */
   public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
-    Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+    Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
     searcher = setUpSearcher(analyzer);
 
     PhraseQuery q = new PhraseQuery();
@@ -161,6 +160,7 @@ public class ShingleAnalyzerWrapperTest 
     PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
     CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
+    ts.reset();
     while (ts.incrementToken()) {
       j += posIncrAtt.getPositionIncrement();
       String termText = termAtt.toString();
@@ -178,7 +178,7 @@ public class ShingleAnalyzerWrapperTest 
    * in the right order and adjacent to each other.
    */
   public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
-    Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+    Analyzer analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
     searcher = setUpSearcher(analyzer);
 
     BooleanQuery q = new BooleanQuery();
@@ -188,6 +188,8 @@ public class ShingleAnalyzerWrapperTest 
     
     CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
+    ts.reset();
+
     while (ts.incrementToken()) {
       String termText =  termAtt.toString();
       q.add(new TermQuery(new Term("content", termText)),
@@ -200,7 +202,7 @@ public class ShingleAnalyzerWrapperTest 
   }
   
   public void testReusableTokenStream() throws Exception {
-    Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
+    Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 2);
     assertAnalyzesToReuse(a, "please divide into shingles",
         new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
         new int[] { 0, 0, 7, 7, 14, 14, 19 },
@@ -222,9 +224,9 @@ public class ShingleAnalyzerWrapperTest 
     @Override
     public TokenStream tokenStream(String fieldName, Reader reader) {
       if (++invocationCount % 2 == 0)
-        return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+        return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
       else
-        return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+        return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
     }
   }
   
@@ -249,7 +251,7 @@ public class ShingleAnalyzerWrapperTest 
 
   public void testNonDefaultMinShingleSize() throws Exception {
     ShingleAnalyzerWrapper analyzer 
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 4);
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);
     assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
                           new String[] { "please",   "please divide this",   "please divide this sentence", 
                                          "divide",   "divide this sentence", "divide this sentence into", 
@@ -273,7 +275,7 @@ public class ShingleAnalyzerWrapperTest 
   
   public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
     ShingleAnalyzerWrapper analyzer
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 3, 3);
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 3);
     assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
                           new String[] { "please",   "please divide this", 
                                          "divide",   "divide this sentence", 
@@ -297,7 +299,7 @@ public class ShingleAnalyzerWrapperTest 
 
   public void testNoTokenSeparator() throws Exception {
     ShingleAnalyzerWrapper analyzer 
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
     analyzer.setTokenSeparator("");
     assertAnalyzesToReuse(analyzer, "please divide into shingles",
                           new String[] { "please", "pleasedivide", 
@@ -319,7 +321,7 @@ public class ShingleAnalyzerWrapperTest 
 
   public void testNullTokenSeparator() throws Exception {
     ShingleAnalyzerWrapper analyzer 
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
     analyzer.setTokenSeparator(null);
     assertAnalyzesToReuse(analyzer, "please divide into shingles",
                           new String[] { "please", "pleasedivide", 
@@ -340,7 +342,7 @@ public class ShingleAnalyzerWrapperTest 
   }
   public void testAltTokenSeparator() throws Exception {
     ShingleAnalyzerWrapper analyzer 
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
     analyzer.setTokenSeparator("<SEP>");
     assertAnalyzesToReuse(analyzer, "please divide into shingles",
                           new String[] { "please", "please<SEP>divide", 
@@ -362,7 +364,7 @@ public class ShingleAnalyzerWrapperTest 
   
   public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception {
     ShingleAnalyzerWrapper analyzer
-      = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+      = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
     analyzer.setOutputUnigrams(false);
     analyzer.setOutputUnigramsIfNoShingles(true);
     assertAnalyzesToReuse(analyzer, "please",

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java Sun May 22 21:45:19 2011
@@ -22,14 +22,14 @@ import java.text.SimpleDateFormat;
 import java.util.Locale;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
 
 public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
 
   public void test() throws IOException {
     DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
     String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006  The dogs finally reacted on 7/12/2006";
-    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
     TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
     int count = 0;
     

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Sun May 22 21:45:19 2011
@@ -84,7 +84,7 @@ public class TestTeeSinkTokenFilter exte
   // with BaseTokenStreamTestCase now...
   public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
     Directory dir = newDirectory();
-    Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+    Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
     Document doc = new Document();
     TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
@@ -108,7 +108,7 @@ public class TestTeeSinkTokenFilter exte
   }
   
   public void testGeneral() throws IOException {
-    final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+    final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
     final TokenStream sink1 = source.newSinkTokenStream();
     final TokenStream sink2 = source.newSinkTokenStream(theFilter);
     
@@ -122,16 +122,17 @@ public class TestTeeSinkTokenFilter exte
   }
 
   public void testMultipleSources() throws Exception {
-    final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer1.toString())));
+    final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
     final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
     final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
+    tee1.reset();
     final TokenStream source1 = new CachingTokenFilter(tee1);
     
     tee1.addAttribute(CheckClearAttributesAttribute.class);
     dogDetector.addAttribute(CheckClearAttributesAttribute.class);
     theDetector.addAttribute(CheckClearAttributesAttribute.class);
 
-    final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer2.toString())));
+    final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false));
     tee2.addSinkTokenStream(dogDetector);
     tee2.addSinkTokenStream(theDetector);
     final TokenStream source2 = tee2;

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Sun May 22 21:45:19 2011
@@ -20,14 +20,14 @@ import java.io.IOException;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.MockTokenizer;
 
 public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
 
   public void test() throws IOException {
     TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
     String test = "The quick red fox jumped over the lazy brown dogs";
-    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)));
+    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
     TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
     
     int count = 0;

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java Sun May 22 21:45:19 2011
@@ -20,9 +20,9 @@ import java.io.IOException;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
@@ -32,7 +32,7 @@ public class TokenTypeSinkTokenizerTest 
     TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
     String test = "The quick red fox jumped over the lazy brown dogs";
 
-    TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
+    TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
     TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
     
     boolean seenDogs = false;

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Sun May 22 21:45:19 2011
@@ -23,6 +23,7 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.hu.HungarianAnalyzer;
 
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends
     checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
     checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Sun May 22 21:45:19 2011
@@ -22,8 +22,8 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 
 import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@@ -36,7 +36,7 @@ public class TestSwedishLightStemFilter 
     @Override
     protected TokenStreamComponents createComponents(String fieldName,
         Reader reader) {
-      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
       return new TokenStreamComponents(source, new SwedishLightStemFilter(source));
     }
   };
@@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter 
   public void testVocabulary() throws IOException {
     assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java Sun May 22 21:45:19 2011
@@ -25,6 +25,7 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -43,14 +44,14 @@ public class TestSynonymFilter extends B
 
   static void assertTokenizesTo(SynonymMap dict, String input,
       String expected[]) throws IOException {
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     SynonymFilter stream = new SynonymFilter(tokenizer, dict);
     assertTokenStreamContents(stream, expected);
   }
   
   static void assertTokenizesTo(SynonymMap dict, String input,
       String expected[], int posIncs[]) throws IOException {
-    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
     SynonymFilter stream = new SynonymFilter(tokenizer, dict);
     assertTokenStreamContents(stream, expected, posIncs);
   }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Sun May 22 21:45:19 2011
@@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
  * limitations under the License.
  */
 
+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.util.Version;
 
 /**
@@ -142,5 +146,23 @@ public class TestThaiAnalyzer extends Ba
             analyzer,
             "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
             new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
-    }
+  }
+	
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
+  
+  // LUCENE-3044
+  public void testAttributeReuse() throws Exception {
+    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+    // just consume
+    TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+    // this consumer adds flagsAtt, which this analyzer does not use. 
+    ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    ts.addAttribute(FlagsAttribute.class);
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Sun May 22 21:45:19 2011
@@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends
     checkOneTermReuse(a, "ağacı", "ağacı");
     checkOneTermReuse(a, "ağaç", "ağaç");
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Sun May 22 21:45:19 2011
@@ -20,8 +20,8 @@ package org.apache.lucene.analysis.tr;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 
 /**
  * Test the Turkish lowercase filter.
@@ -32,8 +32,8 @@ public class TestTurkishLowerCaseFilter 
    * Test composed forms
    */
   public void testTurkishLowerCaseFilter() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
-        "\u0130STANBUL \u0130ZM\u0130R ISPARTA"));
+    TokenStream stream = new MockTokenizer(new StringReader(
+        "\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false);
     TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
     assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
         "\u0131sparta",});
@@ -43,8 +43,8 @@ public class TestTurkishLowerCaseFilter 
    * Test decomposed forms
    */
   public void testDecomposed() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
-        "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"));
+    TokenStream stream = new MockTokenizer(new StringReader(
+        "\u0049\u0307STANBUL \u0049\u0307ZM\u0049\u0307R ISPARTA"), MockTokenizer.WHITESPACE, false);
     TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
     assertTokenStreamContents(filter, new String[] {"istanbul", "izmir",
         "\u0131sparta",});
@@ -56,8 +56,8 @@ public class TestTurkishLowerCaseFilter 
    * to U+0130 + U+0316, and is lowercased the same way.
    */
   public void testDecomposed2() throws Exception {
-    TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
-        "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"));
+    TokenStream stream = new MockTokenizer(new StringReader(
+        "\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false);
     TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream);
     assertTokenStreamContents(filter, new String[] {"i\u0316stanbul", "izmir",
         "\u0131\u0316sparta",});

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java Sun May 22 21:45:19 2011
@@ -186,7 +186,7 @@ public abstract class CollationTestBase 
                                    String dkResult) throws Exception {
     RAMDirectory indexStore = new RAMDirectory();
     IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
-        TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
+        TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
 
     // document data:
     // the tracer field is used to determine which document was hit
@@ -257,27 +257,6 @@ public abstract class CollationTestBase 
     }
     assertEquals(expectedResult, buff.toString());
   }
-  
-  private String randomString() {
-    // ideally we could do this!
-    // return _TestUtil.randomUnicodeString(random);
-    //
-    // http://bugs.icu-project.org/trac/ticket/8060
-    // http://bugs.icu-project.org/trac/ticket/7732
-    // ...
-    // 
-    // as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
-    int length = _TestUtil.nextInt(random, 0, 10);
-    char chars[] = new char[length];
-    for (int i = 0; i < length; i++) {
-      if (random.nextBoolean()) {
-        chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
-      } else {
-        chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
-      }
-    }
-    return new String(chars, 0, length);
-  }
 
   public void assertThreadSafe(final Analyzer analyzer) throws Exception {
     int numTestPoints = 100;
@@ -289,7 +268,7 @@ public abstract class CollationTestBase 
     // and ensure they are the same as the ones we produced in serial fashion.
 
     for (int i = 0; i < numTestPoints; i++) {
-      String term = randomString();
+      String term = _TestUtil.randomSimpleString(random);
       TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
       TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
       BytesRef bytes = termAtt.getBytesRef();

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java Sun May 22 21:45:19 2011
@@ -87,8 +87,7 @@ public class TestCollationKeyAnalyzer ex
   public void testThreadSafe() throws Exception {
     int iters = 20 * RANDOM_MULTIPLIER;
     for (int i = 0; i < iters; i++) {
-      Locale locale = randomLocale(random);
-      Collator collator = Collator.getInstance(locale);
+      Collator collator = Collator.getInstance(Locale.GERMAN);
       collator.setStrength(Collator.PRIMARY);
       assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
     }

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/build.xml?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/build.xml (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/build.xml Sun May 22 21:45:19 2011
@@ -137,4 +137,20 @@ are part of the ICU4C package. See http:
     <m2-deploy-with-pom-template pom.xml="lib/lucene-icu4j-pom.xml.template"
                                  jar.file="lib/icu4j-4_6.jar" />
   </target>
+
+  <target name="javadocs" depends="compile-core">
+   	<sequential>
+       <mkdir dir="${javadoc.dir}/contrib-${name}"/>
+       <invoke-javadoc
+         destdir="${javadoc.dir}/contrib-${name}"
+       	 title="${Name} ${version} contrib-${name} API">
+         <sources>
+           <link href="../contrib-analyzers-common"/>
+           <link href=""/>
+           <packageset dir="${src.dir}"/>
+        </sources>
+      </invoke-javadoc>
+      <jarify basedir="${javadoc.dir}/contrib-${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
+    </sequential>
+  </target>	
 </project>

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Sun May 22 21:45:19 2011
@@ -29,15 +29,14 @@ import org.apache.lucene.analysis.core.W
  * Tests ICUFoldingFilter
  */
 public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
+  Analyzer a = new Analyzer() {
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new ICUFoldingFilter(
+          new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+    }
+  };
   public void testDefaults() throws IOException {
-    Analyzer a = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new ICUFoldingFilter(
-            new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
-      }
-    };
-
     // case folding
     assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
 
@@ -76,4 +75,9 @@ public class TestICUFoldingFilter extend
     // handling of decomposed combining-dot-above
     assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Sun May 22 21:45:19 2011
@@ -31,16 +31,15 @@ import com.ibm.icu.text.Normalizer2;
  * Tests the ICUNormalizer2Filter
  */
 public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
+  Analyzer a = new Analyzer() {
+    @Override
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      return new ICUNormalizer2Filter(
+          new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+    }
+  };
 
   public void testDefaults() throws IOException {
-    Analyzer a = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new ICUNormalizer2Filter(
-            new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
-      }
-    };
-
     // case folding
     assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
 
@@ -75,4 +74,9 @@ public class TestICUNormalizer2Filter ex
     // decompose EAcute into E + combining Acute
     assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Sun May 22 21:45:19 2011
@@ -18,10 +18,15 @@ package org.apache.lucene.analysis.icu;
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 
 import com.ibm.icu.text.Transliterator;
@@ -83,4 +88,17 @@ public class TestICUTransformFilter exte
     TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
     assertTokenStreamContents(ts, new String[] { expected });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    final Transliterator transform = Transliterator.getInstance("Any-Latin");
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
+      }
+    };
+    checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Sun May 22 21:45:19 2011
@@ -232,4 +232,9 @@ public class TestICUTokenizer extends Ba
         new String[] { "仮", "名", "遣", "い", "カタカナ" },
         new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java Sun May 22 21:45:19 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.collation;
 
 
 import com.ibm.icu.text.Collator;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.util.BytesRef;
@@ -88,7 +89,7 @@ public class TestICUCollationKeyAnalyzer
   public void testThreadSafe() throws Exception {
     int iters = 20 * RANDOM_MULTIPLIER;
     for (int i = 0; i < iters; i++) {
-      Locale locale = randomLocale(random);
+      Locale locale = Locale.GERMAN;
       Collator collator = Collator.getInstance(locale);
       collator.setStrength(Collator.IDENTICAL);
       assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));

Modified: lucene/dev/branches/solr2452/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java Sun May 22 21:45:19 2011
@@ -75,7 +75,7 @@ class SegGraph {
     List<SegToken> result = new ArrayList<SegToken>();
     int s = -1, count = 0, size = tokenListTable.size();
     List<SegToken> tokenList;
-    short index = 0;
+    int index = 0;
     while (count < size) {
       if (isStartExist(s)) {
         tokenList = tokenListTable.get(s);