You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/02 15:51:22 UTC
svn commit: r1098566 [16/22] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/contri...
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java Mon May 2 13:50:57 2011
@@ -43,8 +43,8 @@ public class TestFieldCacheSanityChecker
dirA = newDirectory();
dirB = newDirectory();
- IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
- IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
Modified: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Mon May 2 13:50:57 2011
@@ -191,7 +191,7 @@ public class TestFSTs extends LuceneTest
}
final char[] buffer = new char[end];
for (int i = 0; i < end; i++) {
- buffer[i] = (char) _TestUtil.nextInt(random, 97, 102);
+ buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
}
return new String(buffer, 0, end);
}
@@ -942,7 +942,7 @@ public class TestFSTs extends LuceneTest
final LineFileDocs docs = new LineFileDocs(random);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1;
- final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+ final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
final IndexWriter writer = new IndexWriter(dir, conf);
@@ -1421,4 +1421,73 @@ public class TestFSTs extends LuceneTest
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
s.verifyStateAndBelow(fst, arc, 1);
}
+
+ // Make sure raw FST can differentiate between final vs
+ // non-final end nodes
+ public void testNonFinalStopNodes() throws Exception {
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ final Long nothing = outputs.getNoOutput();
+ final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+ final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+
+ final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
+
+ // Add final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ node.isFinal = true;
+ rootNode.addArc('a', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[0].nextFinalOutput = outputs.get(17);
+ rootNode.arcs[0].isFinal = true;
+ rootNode.arcs[0].output = nothing;
+ rootNode.arcs[0].target = frozen;
+ }
+
+ // Add non-final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ rootNode.addArc('b', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[1].nextFinalOutput = nothing;
+ rootNode.arcs[1].output = outputs.get(42);
+ rootNode.arcs[1].target = frozen;
+ }
+
+ fst.finish(fst.addNode(rootNode));
+
+ checkStopNodes(fst, outputs);
+
+ // Make sure it still works after save/load:
+ Directory dir = newDirectory();
+ IndexOutput out = dir.createOutput("fst");
+ fst.save(out);
+ out.close();
+
+ IndexInput in = dir.openInput("fst");
+ final FST<Long> fst2 = new FST<Long>(in, outputs);
+ checkStopNodes(fst2, outputs);
+ in.close();
+ dir.close();
+ }
+
+ private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
+ final Long nothing = outputs.getNoOutput();
+ FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
+ assertEquals(nothing, startArc.output);
+ assertEquals(nothing, startArc.nextFinalOutput);
+
+ FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
+ assertEquals('a', arc.label);
+ assertEquals(17, arc.nextFinalOutput.longValue());
+ assertTrue(arc.isFinal());
+
+ arc = fst.readNextArc(arc);
+ assertEquals('b', arc.label);
+ assertFalse(arc.isFinal());
+ assertEquals(42, arc.output.longValue());
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java Mon May 2 13:50:57 2011
@@ -33,7 +33,8 @@ public enum LicenseType {
MPL("Mozilla Public License", false), //NOT SURE on the required notice
PD("Public Domain", false),
//SUNBCLA("Sun Binary Code License Agreement"),
- SUN("Sun Open Source License", false)
+ SUN("Sun Open Source License", false),
+ FAKE("FAKE license - not needed", false)
;
private String display;
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java Mon May 2 13:50:57 2011
@@ -132,7 +132,8 @@ public class GermanStemmer
strip( buffer );
}
// Additional step for irregular plural nouns like "Matrizen -> Matrix".
- if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+ // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms
+ if ( buffer.length() > 0 && buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
buffer.setCharAt( buffer.length() - 1, 'x' );
}
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Mon May 2 13:50:57 2011
@@ -131,5 +131,8 @@ public final class ThaiWordFilter extend
public void reset() throws IOException {
super.reset();
hasMoreTokensInClone = false;
+ clonedToken = null;
+ clonedTermAtt = null;
+ clonedOffsetAtt = null;
}
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Mon May 2 13:50:57 2011
@@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends
assertAnalyzesTo(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
assertAnalyzesToReuse(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Mon May 2 13:50:57 2011
@@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer exten
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "ÑÑÑоевеÑе ÑÑÑоеве", new String[] { "ÑÑÑой", "ÑÑÑоеве" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Mon May 2 13:50:57 2011
@@ -157,4 +157,8 @@ public class TestBrazilianStemmer extend
checkOneTermReuse(a, input, expected);
}
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends
checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java Mon May 2 13:50:57 2011
@@ -270,4 +270,9 @@ public class TestCJKTokenizer extends Ba
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
newToken("ãã", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Mon May 2 13:50:57 2011
@@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseT
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
final class PayloadSetter extends TokenFilter {
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Mon May 2 13:50:57 2011
@@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends
dir.close();
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Mon May 2 13:50:57 2011
@@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Mon May 2 13:50:57 2011
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@@ -219,4 +220,9 @@ public class TestStandardAnalyzer extend
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Mon May 2 13:50:57 2011
@@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Mon May 2 13:50:57 2011
@@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends B
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Mon May 2 13:50:57 2011
@@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestGermanLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Mon May 2 13:50:57 2011
@@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Mon May 2 13:50:57 2011
@@ -36,20 +36,30 @@ import static org.apache.lucene.analysis
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t,
+ new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ };
- public void testStemming() throws Exception {
- Analyzer analyzer = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t,
- new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
- }
- };
-
+ public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut);
vocOut.close();
}
+
+ // LUCENE-3043: we use keywordtokenizer in this test,
+ // so ensure the stemmer does not crash on zero-length strings.
+ public void testEmpty() throws Exception {
+ assertAnalyzesTo(analyzer, "", new String[] { "" });
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Mon May 2 13:50:57 2011
@@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends B
assertAnalyzesToReuse(a, "ΠΡÎΫΠÎÎÎΣÎÎΣ ÎÏογοÏ, ο μεÏÏÏÏ ÎºÎ±Î¹ οι άλλοι",
new String[] { "ÏÏοÏ
ÏοθεÏ", "αÏογ", "μεÏÏ", "αλλ" });
}
- }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Mon May 2 13:50:57 2011
@@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Mon May 2 13:50:57 2011
@@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilte
checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java Mon May 2 13:50:57 2011
@@ -36,21 +36,21 @@ import static org.apache.lucene.analysis
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
-public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ };
+
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
- Analyzer a = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t, new PorterStemFilter(t));
- }
- };
-
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
@@ -61,4 +61,9 @@ public class TestPorterStemFilter extend
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Mon May 2 13:50:57 2011
@@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Mon May 2 13:50:57 2011
@@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Mon May 2 13:50:57 2011
@@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Mon May 2 13:50:57 2011
@@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extend
checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Mon May 2 13:50:57 2011
@@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends B
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिà¤à¤¦à¥", "हिà¤à¤¦à¥");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestHungarianAnalyzer exten
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extend
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö", "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾", "Õ¡ÖÕ®");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer exte
checkOneTermReuse(a, "peledakan", "peledakan");
checkOneTermReuse(a, "pembunuhan", "bunuh");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends
checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestItalianLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java Mon May 2 13:50:57 2011
@@ -51,7 +51,7 @@ public class TestLimitTokenCountAnalyzer
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000)));
+ TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(random), 100000)));
Document doc = new Document();
StringBuilder b = new StringBuilder();
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Mon May 2 13:50:57 2011
@@ -185,4 +185,9 @@ public class TestDutchStemmer extends Ba
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer exten
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer exte
checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java Mon May 2 13:50:57 2011
@@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilt
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java Mon May 2 13:50:57 2011
@@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFi
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Mon May 2 13:50:57 2011
@@ -66,4 +66,9 @@ public class TestPortugueseStemFilter ex
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestRomanianAnalyzer extend
checkOneTermReuse(a, "absenţa", "absenţa");
checkOneTermReuse(a, "absenţi", "absenţ");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Mon May 2 13:50:57 2011
@@ -64,4 +64,9 @@ public class TestRussianAnalyzer extends
new String[] { "вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавление" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestRussianLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Mon May 2 13:50:57 2011
@@ -23,6 +23,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.hu.HungarianAnalyzer;
public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
@@ -50,4 +51,9 @@ public class TestSwedishAnalyzer extends
checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Mon May 2 13:50:57 2011
@@ -45,4 +45,9 @@ public class TestSwedishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Mon May 2 13:50:57 2011
@@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
+import java.io.StringReader;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.util.Version;
/**
@@ -142,5 +146,23 @@ public class TestThaiAnalyzer extends Ba
analyzer,
"à¸à¸£à¸´à¸©à¸±à¸à¸à¸·à¹à¸ XY&Z - à¸à¸¸à¸¢à¸à¸±à¸ xyz@demo.com",
new String[] { "à¸à¸£à¸´à¸©à¸±à¸", "à¸à¸·à¹à¸", "xy&z", "à¸à¸¸à¸¢", "à¸à¸±à¸", "xyz@demo.com" });
- }
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+
+ // LUCENE-3044
+ public void testAttributeReuse() throws Exception {
+ assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
+ ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+ // just consume
+ TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาà¹à¸à¸¢"));
+ assertTokenStreamContents(ts, new String[] { "ภาษา", "à¹à¸à¸¢" });
+ // this consumer adds flagsAtt, which this analyzer does not use.
+ ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาà¹à¸à¸¢"));
+ ts.addAttribute(FlagsAttribute.class);
+ assertTokenStreamContents(ts, new String[] { "ภาษา", "à¹à¸à¸¢" });
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestTurkishAnalyzer extends
checkOneTermReuse(a, "aÄacı", "aÄacı");
checkOneTermReuse(a, "aÄaç", "aÄaç");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java Mon May 2 13:50:57 2011
@@ -186,7 +186,7 @@ public abstract class CollationTestBase
String dkResult) throws Exception {
RAMDirectory indexStore = new RAMDirectory();
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
// document data:
// the tracer field is used to determine which document was hit
Modified: lucene/dev/branches/docvalues/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/icu/build.xml?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/icu/build.xml (original)
+++ lucene/dev/branches/docvalues/modules/analysis/icu/build.xml Mon May 2 13:50:57 2011
@@ -137,4 +137,20 @@ are part of the ICU4C package. See http:
<m2-deploy-with-pom-template pom.xml="lib/lucene-icu4j-pom.xml.template"
jar.file="lib/icu4j-4_6.jar" />
</target>
+
+ <target name="javadocs" depends="compile-core">
+ <sequential>
+ <mkdir dir="${javadoc.dir}/contrib-${name}"/>
+ <invoke-javadoc
+ destdir="${javadoc.dir}/contrib-${name}"
+ title="${Name} ${version} contrib-${name} API">
+ <sources>
+ <link href="../contrib-analyzers-common"/>
+ <link href=""/>
+ <packageset dir="${src.dir}"/>
+ </sources>
+ </invoke-javadoc>
+ <jarify basedir="${javadoc.dir}/contrib-${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
+ </sequential>
+ </target>
</project>
Modified: lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Mon May 2 13:50:57 2011
@@ -29,15 +29,14 @@ import org.apache.lucene.analysis.core.W
* Tests ICUFoldingFilter
*/
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
+ Analyzer a = new Analyzer() {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new ICUFoldingFilter(
+ new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+ }
+ };
public void testDefaults() throws IOException {
- Analyzer a = new Analyzer() {
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ICUFoldingFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
- }
- };
-
// case folding
assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
@@ -76,4 +75,9 @@ public class TestICUFoldingFilter extend
// handling of decomposed combining-dot-above
assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Mon May 2 13:50:57 2011
@@ -31,16 +31,15 @@ import com.ibm.icu.text.Normalizer2;
* Tests the ICUNormalizer2Filter
*/
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
+ Analyzer a = new Analyzer() {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new ICUNormalizer2Filter(
+ new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+ }
+ };
public void testDefaults() throws IOException {
- Analyzer a = new Analyzer() {
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ICUNormalizer2Filter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
- }
- };
-
// case folding
assertAnalyzesTo(a, "This is a test", new String[] { "this", "is", "a", "test" });
@@ -75,4 +74,9 @@ public class TestICUNormalizer2Filter ex
// decompose EAcute into E + combining Acute
assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Mon May 2 13:50:57 2011
@@ -18,10 +18,15 @@ package org.apache.lucene.analysis.icu;
*/
import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import com.ibm.icu.text.Transliterator;
@@ -83,4 +88,17 @@ public class TestICUTransformFilter exte
TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
assertTokenStreamContents(ts, new String[] { expected });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ final Transliterator transform = Transliterator.getInstance("Any-Latin");
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, transform));
+ }
+ };
+ checkRandomData(random, a, 1000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Mon May 2 13:50:57 2011
@@ -232,4 +232,9 @@ public class TestICUTokenizer extends Ba
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java Mon May 2 13:50:57 2011
@@ -75,7 +75,7 @@ class SegGraph {
List<SegToken> result = new ArrayList<SegToken>();
int s = -1, count = 0, size = tokenListTable.size();
List<SegToken> tokenList;
- short index = 0;
+ int index = 0;
while (count < size) {
if (isStartExist(s)) {
tokenList = tokenListTable.get(s);
Modified: lucene/dev/branches/docvalues/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Mon May 2 13:50:57 2011
@@ -17,8 +17,11 @@
package org.apache.lucene.analysis.cn.smart;
+import java.io.StringReader;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
@@ -166,4 +169,35 @@ public class TestSmartChineseAnalyzer ex
new int[] { 0, 1, 3, 4, 6, 7 },
new int[] { 1, 3, 4, 6, 7, 9 });
}
+
+ // LUCENE-3026
+ public void testLargeDocument() throws Exception {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < 5000; i++) {
+ sb.append("æè´ä¹°äºéå
·åæè£
ã");
+ }
+ Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+ TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+ stream.reset();
+ while (stream.incrementToken()) {
+ }
+ }
+
+ // LUCENE-3026
+ public void testLargeSentence() throws Exception {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < 5000; i++) {
+ sb.append("æè´ä¹°äºéå
·åæè£
");
+ }
+ Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+ TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+ stream.reset();
+ while (stream.incrementToken()) {
+ }
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (original)
+++ lucene/dev/branches/docvalues/modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java Mon May 2 13:50:57 2011
@@ -50,4 +50,9 @@ public class TestPolishAnalyzer extends
checkOneTermReuse(a, "studenta", "studenta");
checkOneTermReuse(a, "studenci", "student");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/docvalues/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/modules/benchmark/CHANGES.txt?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/modules/benchmark/CHANGES.txt Mon May 2 13:50:57 2011
@@ -2,6 +2,22 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
+03/31/2011
+ Updated ReadTask to the new method for obtaining a top-level deleted docs
+ bitset. Also checking the bitset for null, when there are no deleted docs.
+ (Steve Rowe, Mike McCandless)
+
+ Updated NewAnalyzerTask and NewShingleAnalyzerTask to handle analyzers
+ in the new org.apache.lucene.analysis.core package (KeywordAnalyzer,
+ SimpleAnalyzer, etc.) (Steve Rowe, Robert Muir)
+
+ Updated ReadTokensTask to convert tokens to their indexed forms
+ (char[]->byte[]), just as the indexer does. This allows measurement
+ of the conversion process, which is important for analysis components
+ that customize it, e.g. (ICU)CollationKeyFilter. As a result,
+ benchmarks that incorporate this task will no longer be directly
+ comparable between 3.X and 4.0. (Robert Muir, Steve Rowe)
+
03/24/2011
LUCENE-2977: WriteLineDocTask now automatically detects how to write -
GZip or BZip2 or Plain-text - according to the output file extension.