You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/21 04:35:01 UTC
svn commit: r1303269 - in /lucene/dev/branches/branch_3x/lucene:
contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/
contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/
contrib/analyzers/common/src/test/org/apache/luc...
Author: rmuir
Date: Wed Mar 21 03:35:01 2012
New Revision: 1303269
URL: http://svn.apache.org/viewvc?rev=1303269&view=rev
Log:
LUCENE-3894: add large docs tests for more tokenizers
Modified:
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java
lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java
lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Wed Mar 21 03:35:01 2012
@@ -513,6 +513,25 @@ public class HTMLStripCharFilterTest ext
checkRandomData(random, analyzer, numRounds);
}
+ public void testRandomHugeStrings() throws Exception {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+
+ @Override
+ protected Reader initReader(Reader reader) {
+ return new HTMLStripCharFilter(CharReader.get(reader));
+ }
+ };
+
+ int numRounds = RANDOM_MULTIPLIER * 200;
+ checkRandomData(random, analyzer, numRounds, 8192);
+ }
+
public void testServerSideIncludes() throws Exception {
String test = "one<img src=\"image.png\"\n"
+ " alt = \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}' -->\"\n\n"
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -272,4 +272,9 @@ public class TestCJKAnalyzer extends Bas
public void testRandomStrings() throws Exception {
checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Wed Mar 21 03:35:01 2012
@@ -209,4 +209,16 @@ public class TestPathHierarchyTokenizer
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new PathHierarchyTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java Wed Mar 21 03:35:01 2012
@@ -170,4 +170,16 @@ public class TestReversePathHierarchyTok
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Wed Mar 21 03:35:01 2012
@@ -1145,4 +1145,16 @@ public class ShingleFilterTest extends B
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
+ }
+ };
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Wed Mar 21 03:35:01 2012
@@ -429,6 +429,31 @@ public class TestSynonymMapFilter extend
}
}
+ /** simple random test like testRandom2, but for large docs
+ */
+ public void testRandomHuge() throws Exception {
+ final int numIters = atLeast(10);
+ for (int i = 0; i < numIters; i++) {
+ b = new SynonymMap.Builder(random.nextBoolean());
+ final int numEntries = atLeast(10);
+ for (int j = 0; j < numEntries; j++) {
+ add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+ }
+ final SynonymMap map = b.build();
+ final boolean ignoreCase = random.nextBoolean();
+
+ final Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
+ }
+ };
+
+ checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
+ }
+ }
+
// LUCENE-3375
public void testVanishingTerms() throws Exception {
String testFile =
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -168,6 +168,11 @@ public class TestThaiAnalyzer extends Ba
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
+
// LUCENE-3044
public void testAttributeReuse() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java Wed Mar 21 03:35:01 2012
@@ -186,4 +186,17 @@ public class WikipediaTokenizerTest exte
};
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ Analyzer a = new ReusableAnalyzerBase() {
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WikipediaTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestAnalyzers.java Wed Mar 21 03:35:01 2012
@@ -228,6 +228,13 @@ public class TestAnalyzers extends BaseT
checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
}
final class PayloadSetter extends TokenFilter {
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -306,4 +306,9 @@ public class TestClassicAnalyzer extends
public void testRandomStrings() throws Exception {
checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java Wed Mar 21 03:35:01 2012
@@ -239,4 +239,9 @@ public class TestStandardAnalyzer extend
public void testRandomStrings() throws Exception {
checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 200*RANDOM_MULTIPLIER, 8192);
+ }
}
Modified: lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java?rev=1303269&r1=1303268&r2=1303269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java Wed Mar 21 03:35:01 2012
@@ -469,4 +469,9 @@ public class TestUAX29URLEmailTokenizer
public void testRandomStrings() throws Exception {
checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
}
+
+ /** blast some random large strings through the analyzer */
+ public void testRandomHugeStrings() throws Exception {
+ checkRandomData(random, a, 200*RANDOM_MULTIPLIER, 8192);
+ }
}