You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/21 02:52:22 UTC
svn commit: r1303253 - in /lucene/dev/trunk:
lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
Author: rmuir
Date: Wed Mar 21 01:52:22 2012
New Revision: 1303253
URL: http://svn.apache.org/viewvc?rev=1303253&view=rev
Log:
LUCENE-3896: beef up TestDuelingAnalyzers for larger documents
Modified:
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java?rev=1303253&r1=1303252&r2=1303253&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java Wed Mar 21 01:52:22 2012
@@ -51,6 +51,7 @@ public final class MockAnalyzer extends
private final Random random;
private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
private boolean enableChecks = true;
+ private int maxTokenLength = MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH;
/**
* Creates a new MockAnalyzer.
@@ -90,7 +91,7 @@ public final class MockAnalyzer extends
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
+ MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength);
tokenizer.setEnableChecks(enableChecks);
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
@@ -137,4 +138,11 @@ public final class MockAnalyzer extends
public void setEnableChecks(boolean enableChecks) {
this.enableChecks = enableChecks;
}
+
+ /**
+ * Toggle maxTokenLength for MockTokenizer
+ */
+ public void setMaxTokenLength(int length) {
+ this.maxTokenLength = length;
+ }
}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java?rev=1303253&r1=1303252&r2=1303253&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java Wed Mar 21 01:52:22 2012
@@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockReaderWrapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -73,8 +74,63 @@ public class TestDuelingAnalyzers extend
};
for (int i = 0; i < 10000; i++) {
String s = _TestUtil.randomSimpleString(random);
- assertEquals(s, left.tokenStream("foo", new StringReader(s)),
- right.tokenStream("foo", new StringReader(s)));
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ // not so useful since its all one token?!
+ public void testLetterAsciiHuge() throws Exception {
+ int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.setMaxTokenLength(255); // match CharTokenizer's max token length
+ Analyzer right = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ int numIterations = atLeast(100);
+ for (int i = 0; i < numIterations; i++) {
+ String s = _TestUtil.randomSimpleString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ public void testLetterHtmlish() throws Exception {
+ Analyzer left = new MockAnalyzer(random, jvmLetter, false);
+ Analyzer right = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ for (int i = 0; i < 10000; i++) {
+ String s = _TestUtil.randomHtmlishString(random, 20);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ public void testLetterHtmlishHuge() throws Exception {
+ int maxLength = 2048; // this is number of elements, not chars!
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.setMaxTokenLength(255); // match CharTokenizer's max token length
+ Analyzer right = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ int numIterations = atLeast(100);
+ for (int i = 0; i < numIterations; i++) {
+ String s = _TestUtil.randomHtmlishString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
}
}
@@ -89,8 +145,27 @@ public class TestDuelingAnalyzers extend
};
for (int i = 0; i < 10000; i++) {
String s = _TestUtil.randomUnicodeString(random);
- assertEquals(s, left.tokenStream("foo", new StringReader(s)),
- right.tokenStream("foo", new StringReader(s)));
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
+ }
+ }
+
+ public void testLetterUnicodeHuge() throws Exception {
+ int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
+ MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+ left.setMaxTokenLength(255); // match CharTokenizer's max token length
+ Analyzer right = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ };
+ int numIterations = atLeast(100);
+ for (int i = 0; i < numIterations; i++) {
+ String s = _TestUtil.randomUnicodeString(random, maxLength);
+ assertEquals(s, left.tokenStream("foo", newStringReader(s)),
+ right.tokenStream("foo", newStringReader(s)));
}
}
@@ -120,4 +195,13 @@ public class TestDuelingAnalyzers extend
left.close();
right.close();
}
+
+ // TODO: maybe push this out to _TestUtil or LuceneTestCase and always use it instead?
+ private static Reader newStringReader(String s) {
+ Reader r = new StringReader(s);
+ if (random.nextBoolean()) {
+ r = new MockReaderWrapper(random, r);
+ }
+ return r;
+ }
}