You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/21 04:35:01 UTC

svn commit: r1303269 - in /lucene/dev/branches/branch_3x/lucene: contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/ contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/ contrib/analyzers/common/src/test/org/apache/luc...

Author: rmuir
Date: Wed Mar 21 03:35:01 2012
New Revision: 1303269

URL: http://svn.apache.org/viewvc?rev=1303269&view=rev
Log:
LUCENE-3894: add large docs tests for more tokenizers

Modified:
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Wed Mar 21 03:35:01 2012
@@ -513,6 +513,25 @@ public class HTMLStripCharFilterTest ext
     checkRandomData(random, analyzer, numRounds);
   }
   
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }
+
+      @Override
+      protected Reader initReader(Reader reader) {
+        return new HTMLStripCharFilter(CharReader.get(reader));
+      }
+    };
+    
+    int numRounds = RANDOM_MULTIPLIER * 200;
+    checkRandomData(random, analyzer, numRounds, 8192);
+  }
+  
   public void testServerSideIncludes() throws Exception {
     String test = "one<img src=\"image.png\"\n"
         + " alt =  \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n"

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -272,4 +272,9 @@ public class TestCJKAnalyzer extends Bas
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Wed Mar 21 03:35:01 2012
@@ -209,4 +209,16 @@ public class TestPathHierarchyTokenizer 
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new PathHierarchyTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }    
+    };
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java Wed Mar 21 03:35:01 2012
@@ -170,4 +170,16 @@ public class TestReversePathHierarchyTok
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }    
+    };
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Wed Mar 21 03:35:01 2012
@@ -1145,4 +1145,16 @@ public class ShingleFilterTest extends B
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+        return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
+      }
+    };
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Wed Mar 21 03:35:01 2012
@@ -429,6 +429,31 @@ public class TestSynonymMapFilter extend
     }
   }
   
+  /** simple random test like testRandom2, but for large docs
+   */
+  public void testRandomHuge() throws Exception {
+    final int numIters = atLeast(10);
+    for (int i = 0; i < numIters; i++) {
+      b = new SynonymMap.Builder(random.nextBoolean());
+      final int numEntries = atLeast(10);
+      for (int j = 0; j < numEntries; j++) {
+        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+      }
+      final SynonymMap map = b.build();
+      final boolean ignoreCase = random.nextBoolean();
+      
+      final Analyzer analyzer = new ReusableAnalyzerBase() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
+        }
+      };
+
+      checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
+    }
+  }
+  
   // LUCENE-3375
   public void testVanishingTerms() throws Exception {
     String testFile = 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -168,6 +168,11 @@ public class TestThaiAnalyzer extends Ba
     checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
   
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
+  
   // LUCENE-3044
   public void testAttributeReuse() throws Exception {
     ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java Wed Mar 21 03:35:01 2012
@@ -186,4 +186,17 @@ public class WikipediaTokenizerTest exte
     };
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer a = new ReusableAnalyzerBase() {
+
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new WikipediaTokenizer(reader);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      } 
+    };
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java Wed Mar 21 03:35:01 2012
@@ -228,6 +228,13 @@ public class TestAnalyzers extends BaseT
     checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
     checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   } 
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  } 
 }
 
 final class PayloadSetter extends TokenFilter {

Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -306,4 +306,9 @@ public class TestClassicAnalyzer extends
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -239,4 +239,9 @@ public class TestStandardAnalyzer extend
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java Wed Mar 21 03:35:01 2012
@@ -469,4 +469,9 @@ public class TestUAX29URLEmailTokenizer 
   public void testRandomStrings() throws Exception {
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }