You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/21 04:14:46 UTC

svn commit: r1303262 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/ lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/ lucene/contrib/icu/src/t...

Author: rmuir
Date: Wed Mar 21 03:14:45 2012
New Revision: 1303262

URL: http://svn.apache.org/viewvc?rev=1303262&view=rev
Log:
LUCENE-3894: for tokenizers, add some tests for larger documents

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
    lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java Wed Mar 21 03:14:45 2012
@@ -60,4 +60,14 @@ public class TestExtendedMode extends Ba
       }
     }
   }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java Wed Mar 21 03:14:45 2012
@@ -127,6 +127,14 @@ public class TestKuromojiAnalyzer extend
                                             KuromojiAnalyzer.getDefaultStopTags());
     checkRandomData(random, a, atLeast(10000));
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    final Analyzer a = new KuromojiAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH,
+        KuromojiAnalyzer.getDefaultStopSet(),
+        KuromojiAnalyzer.getDefaultStopTags());
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 
   // Copied from TestKuromojiTokenizer, to make sure passing
   // user dict to analyzer works:

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java Wed Mar 21 03:14:45 2012
@@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.junit.Ignore;
 
 public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
 
@@ -191,6 +192,13 @@ public class TestKuromojiTokenizer exten
     checkRandomData(random, analyzerNoPunct, 10000*RANDOM_MULTIPLIER);
   }
   
+  /** blast some random large strings through the analyzer */
+  @Ignore("FIXME: see LUCENE-3897")
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
+  }
+  
   public void testLargeDocReliability() throws Exception {
     for (int i = 0; i < 100; i++) {
       String s = _TestUtil.randomUnicodeString(random, 10000);

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Wed Mar 21 03:14:45 2012
@@ -224,4 +224,9 @@ public class TestSmartChineseAnalyzer ex
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Wed Mar 21 03:14:45 2012
@@ -237,4 +237,9 @@ public class TestICUTokenizer extends Ba
   public void testRandomStrings() throws Exception {
     checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
   }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Wed Mar 21 03:14:45 2012
@@ -295,7 +295,12 @@ public abstract class BaseTokenStreamTes
   
   /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
   public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
-    checkRandomData(random, a, iterations, false);
+    checkRandomData(random, a, iterations, 20, false);
+  }
+  
+  /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
+  public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
+    checkRandomData(random, a, iterations, maxWordLength, false);
   }
   
   /**