You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/21 04:14:46 UTC
svn commit: r1303262 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/
lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/
lucene/contrib/icu/src/t...
Author: rmuir
Date: Wed Mar 21 03:14:45 2012
New Revision: 1303262
URL: http://svn.apache.org/viewvc?rev=1303262&view=rev
Log:
LUCENE-3894: for tokenizers, add some tests for larger documents
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestExtendedMode.java Wed Mar 21 03:14:45 2012
@@ -60,4 +60,14 @@ public class TestExtendedMode extends Ba
}
}
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiAnalyzer.java Wed Mar 21 03:14:45 2012
@@ -127,6 +127,14 @@ public class TestKuromojiAnalyzer extend
KuromojiAnalyzer.getDefaultStopTags());
checkRandomData(random, a, atLeast(10000));
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ final Analyzer a = new KuromojiAnalyzer(TEST_VERSION_CURRENT, null, Mode.SEARCH,
+ KuromojiAnalyzer.getDefaultStopSet(),
+ KuromojiAnalyzer.getDefaultStopTags());
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
// Copied from TestKuromojiTokenizer, to make sure passing
// user dict to analyzer works:
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java Wed Mar 21 03:14:45 2012
@@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.junit.Ignore;
public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
@@ -191,6 +192,13 @@ public class TestKuromojiTokenizer exten
checkRandomData(random, analyzerNoPunct, 10000*RANDOM_MULTIPLIER);
}
+ /** blast some random large strings through the analyzer */
+ @Ignore("FIXME: see LUCENE-3897")
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
+ checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
+ }
+
public void testLargeDocReliability() throws Exception {
for (int i = 0; i < 100; i++) {
String s = _TestUtil.randomUnicodeString(random, 10000);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Wed Mar 21 03:14:45 2012
@@ -224,4 +224,9 @@ public class TestSmartChineseAnalyzer ex
public void testRandomStrings() throws Exception {
checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Wed Mar 21 03:14:45 2012
@@ -237,4 +237,9 @@ public class TestICUTokenizer extends Ba
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1303262&r1=1303261&r2=1303262&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Wed Mar 21 03:14:45 2012
@@ -295,7 +295,12 @@ public abstract class BaseTokenStreamTes
/** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
- checkRandomData(random, a, iterations, false);
+ checkRandomData(random, a, iterations, 20, false);
+ }
+
+ /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
+ public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
+ checkRandomData(random, a, iterations, maxWordLength, false);
}
/**