You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 17:24:23 UTC
svn commit: r1101062 [16/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/
dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea...
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java Mon May 9 15:24:04 2011
@@ -24,6 +24,7 @@ import java.io.ObjectOutputStream;
import java.io.ByteArrayOutputStream;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -49,11 +50,11 @@ public class TestRAMDirectory extends Lu
@Override
public void setUp() throws Exception {
super.setUp();
- indexDir = new File(TEMP_DIR, "RAMDirIndex");
+ indexDir = _TestUtil.getTempDir("RAMDirIndex");
Directory dir = newFSDirectory(indexDir);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
// add some documents
Document doc = null;
for (int i = 0; i < docsToAdd; i++) {
@@ -105,7 +106,7 @@ public class TestRAMDirectory extends Lu
dir.close();
final IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
writer.optimize();
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java Mon May 9 15:24:04 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.io.File;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@@ -59,7 +60,7 @@ public class TestWindowsMMap extends Luc
}
private final static String storePathname =
- new File(TEMP_DIR,"testLuceneMmap").getAbsolutePath();
+ _TestUtil.getTempDir("testLuceneMmap").getAbsolutePath();
public void testMmapIndex() throws Exception {
// sometimes the directory is not cleaned by rmDir, because on Windows it
@@ -71,7 +72,7 @@ public class TestWindowsMMap extends Luc
// plan to add a set of useful stopwords, consider changing some of the
// interior filters.
- MockAnalyzer analyzer = new MockAnalyzer();
+ MockAnalyzer analyzer = new MockAnalyzer(random);
// TODO: something about lock timeouts and leftover locks.
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer)
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java Mon May 9 15:24:04 2011
@@ -144,6 +144,24 @@ public class TestArrayUtil extends Lucen
}
}
+ private Integer[] createSparseRandomArray(int maxSize) {
+ final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = Integer.valueOf(random.nextInt(2));
+ }
+ return a;
+ }
+
+ // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
+ public void testQuickToMergeSortFallback() {
+ for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
+ Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
+ ArrayUtil.quickSort(a1);
+ Arrays.sort(a2);
+ assertArrayEquals(a2, a1);
+ }
+ }
+
public void testMergeSort() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java Mon May 9 15:24:04 2011
@@ -147,4 +147,14 @@ public class TestAttributeSource extends
fail("Should throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {}
}
+
+ public void testLUCENE_3042() throws Exception {
+ final AttributeSource src1 = new AttributeSource();
+ src1.addAttribute(CharTermAttribute.class).append("foo");
+ int hash1 = src1.hashCode(); // this triggers a cached state
+ final AttributeSource src2 = new AttributeSource(src1);
+ src2.addAttribute(TypeAttribute.class).setType("bar");
+ assertTrue("The hashCode is identical, so the captured state was preserved.", hash1 != src1.hashCode());
+ assertEquals(src2.hashCode(), src1.hashCode());
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java Mon May 9 15:24:04 2011
@@ -135,6 +135,7 @@ public class TestBytesRefHash extends Lu
public void testCompact() {
BytesRef ref = new BytesRef();
for (int j = 0; j < 2 * RANDOM_MULTIPLIER; j++) {
+ int numEntries = 0;
final int size = 797;
BitSet bits = new BitSet(size);
for (int i = 0; i < size; i++) {
@@ -143,13 +144,21 @@ public class TestBytesRefHash extends Lu
str = _TestUtil.randomRealisticUnicodeString(random, 1000);
} while (str.length() == 0);
ref.copy(str);
- bits.set(hash.add(ref));
-
+ final int key = hash.add(ref);
+ if (key < 0) {
+ assertTrue(bits.get((-key)-1));
+ } else {
+ assertFalse(bits.get(key));
+ bits.set(key);
+ numEntries++;
+ }
}
assertEquals(hash.size(), bits.cardinality());
+ assertEquals(numEntries, bits.cardinality());
+ assertEquals(numEntries, hash.size());
int[] compact = hash.compact();
- assertTrue(size < compact.length);
- for (int i = 0; i < size; i++) {
+ assertTrue(numEntries < compact.length);
+ for (int i = 0; i < numEntries; i++) {
bits.set(compact[i], false);
}
assertEquals(0, bits.cardinality());
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java Mon May 9 15:24:04 2011
@@ -43,8 +43,8 @@ public class TestFieldCacheSanityChecker
dirA = newDirectory();
dirB = newDirectory();
- IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
- IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Mon May 9 15:24:04 2011
@@ -942,7 +942,7 @@ public class TestFSTs extends LuceneTest
final LineFileDocs docs = new LineFileDocs(random);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1;
- final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+ final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
final IndexWriter writer = new IndexWriter(dir, conf);
@@ -1421,4 +1421,73 @@ public class TestFSTs extends LuceneTest
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
s.verifyStateAndBelow(fst, arc, 1);
}
+
+ // Make sure raw FST can differentiate between final vs
+ // non-final end nodes
+ public void testNonFinalStopNodes() throws Exception {
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ final Long nothing = outputs.getNoOutput();
+ final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+ final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+
+ final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
+
+ // Add final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ node.isFinal = true;
+ rootNode.addArc('a', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[0].nextFinalOutput = outputs.get(17);
+ rootNode.arcs[0].isFinal = true;
+ rootNode.arcs[0].output = nothing;
+ rootNode.arcs[0].target = frozen;
+ }
+
+ // Add non-final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ rootNode.addArc('b', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[1].nextFinalOutput = nothing;
+ rootNode.arcs[1].output = outputs.get(42);
+ rootNode.arcs[1].target = frozen;
+ }
+
+ fst.finish(fst.addNode(rootNode));
+
+ checkStopNodes(fst, outputs);
+
+ // Make sure it still works after save/load:
+ Directory dir = newDirectory();
+ IndexOutput out = dir.createOutput("fst");
+ fst.save(out);
+ out.close();
+
+ IndexInput in = dir.openInput("fst");
+ final FST<Long> fst2 = new FST<Long>(in, outputs);
+ checkStopNodes(fst2, outputs);
+ in.close();
+ dir.close();
+ }
+
+ private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
+ final Long nothing = outputs.getNoOutput();
+ FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
+ assertEquals(nothing, startArc.output);
+ assertEquals(nothing, startArc.nextFinalOutput);
+
+ FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
+ assertEquals('a', arc.label);
+ assertEquals(17, arc.nextFinalOutput.longValue());
+ assertTrue(arc.isFinal());
+
+ arc = fst.readNextArc(arc);
+ assertEquals('b', arc.label);
+ assertFalse(arc.isFinal());
+ assertEquals(42, arc.output.longValue());
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java Mon May 9 15:24:04 2011
@@ -33,7 +33,8 @@ public enum LicenseType {
MPL("Mozilla Public License", false), //NOT SURE on the required notice
PD("Public Domain", false),
//SUNBCLA("Sun Binary Code License Agreement"),
- SUN("Sun Open Source License", false)
+ SUN("Sun Open Source License", false),
+ FAKE("FAKE license - not needed", false)
;
private String display;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt Mon May 9 15:24:04 2011
@@ -83,6 +83,8 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
/something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java Mon May 9 15:24:04 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Mon May 9 15:24:04 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Mon May 9 15:24:04 2011
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Mon May 9 15:24:04 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Mon May 9 15:24:04 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Mon May 9 15:24:04 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java Mon May 9 15:24:04 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java Mon May 9 15:24:04 2011
@@ -132,7 +132,8 @@ public class GermanStemmer
strip( buffer );
}
// Additional step for irregular plural nouns like "Matrizen -> Matrix".
- if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+ // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms
+ if ( buffer.length() > 0 && buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
buffer.setCharAt( buffer.length() - 1, 'x' );
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Mon May 9 15:24:04 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java Mon May 9 15:24:04 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Mon May 9 15:24:04 2011
@@ -25,57 +25,71 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
- *
+ *
* Take something like:
- *
+ *
* <pre>
- * /soemthing/something/else
+ * /something/something/else
* </pre>
- *
+ *
* and make:
- *
+ *
* <pre>
- * /soemthing
- * /soemthing/something
- * /soemthing/something/else
+ * /something
+ * /something/something
+ * /something/something/else
* </pre>
- *
*/
public class PathHierarchyTokenizer extends Tokenizer {
public PathHierarchyTokenizer(Reader input) {
- this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
}
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
- this(input, bufferSize, delimiter, delimiter);
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
}
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
- this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
}
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
super(input);
termAtt.resizeBuffer(bufferSize);
+
this.delimiter = delimiter;
this.replacement = replacement;
- endDelimiter = false;
+ this.skip = skip;
resultToken = new StringBuilder(bufferSize);
}
-
+
private static final int DEFAULT_BUFFER_SIZE = 1024;
public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
private final char delimiter;
private final char replacement;
-
+ private final int skip;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+ private int startPosition = 0;
private int finalOffset = 0;
- private boolean endDelimiter;
+ private int skipped = 0;
+ private boolean endDelimiter = false;
private StringBuilder resultToken;
+
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
@@ -97,43 +111,69 @@ public class PathHierarchyTokenizer exte
while (true) {
int c = input.read();
- if( c < 0 ) {
- length += resultToken.length();
- termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
- if( added ){
- resultToken.setLength(0);
- resultToken.append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- added = true;
- if( c == delimiter ) {
- if( length > 0 ){
- endDelimiter = true;
- break;
+ if( c < 0 ){
+ if( skipped > skip ) {
+ length += resultToken.length();
+ termAtt.setLength(length);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+ if( added ){
+ resultToken.setLength(0);
+ resultToken.append(termAtt.buffer(), 0, length);
+ }
+ return added;
}
else{
- termAtt.append(replacement);
+ finalOffset = correctOffset(startPosition + length);
+ return false;
+ }
+ }
+ if( !added ){
+ added = true;
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
+ else {
+ startPosition++;
+ }
}
else {
- termAtt.append((char)c);
- length++;
+ if( c == delimiter ){
+ if( skipped > skip ){
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(replacement);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
+ else {
+ if( skipped > skip ){
+ termAtt.append((char)c);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
}
}
-
length += resultToken.length();
termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
-
+
@Override
public final void end() {
// set final offset
@@ -146,5 +186,6 @@ public class PathHierarchyTokenizer exte
resultToken.setLength(0);
finalOffset = 0;
endDelimiter = false;
+ skipped = 0;
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java Mon May 9 15:24:04 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Mon May 9 15:24:04 2011
@@ -131,5 +131,8 @@ public final class ThaiWordFilter extend
public void reset() throws IOException {
super.reset();
hasMoreTokensInClone = false;
+ clonedToken = null;
+ clonedTermAtt = null;
+ clonedOffsetAtt = null;
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Mon May 9 15:24:04 2011
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Mon May 9 15:24:04 2011
@@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends
assertAnalyzesTo(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
assertAnalyzesToReuse(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Mon May 9 15:24:04 2011
@@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer exten
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "ÑÑÑоевеÑе ÑÑÑоеве", new String[] { "ÑÑÑой", "ÑÑÑоеве" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Mon May 9 15:24:04 2011
@@ -157,4 +157,8 @@ public class TestBrazilianStemmer extend
checkOneTermReuse(a, input, expected);
}
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends
checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java Mon May 9 15:24:04 2011
@@ -270,4 +270,9 @@ public class TestCJKTokenizer extends Ba
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
newToken("ãã", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Mon May 9 15:24:04 2011
@@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseT
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
final class PayloadSetter extends TokenFilter {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Mon May 9 15:24:04 2011
@@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends
dir.close();
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Mon May 9 15:24:04 2011
@@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Mon May 9 15:24:04 2011
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@@ -219,4 +220,9 @@ public class TestStandardAnalyzer extend
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Mon May 9 15:24:04 2011
@@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Mon May 9 15:24:04 2011
@@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends B
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Mon May 9 15:24:04 2011
@@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Mon May 9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestGermanLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Mon May 9 15:24:04 2011
@@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Mon May 9 15:24:04 2011
@@ -36,20 +36,30 @@ import static org.apache.lucene.analysis
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t,
+ new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ };
- public void testStemming() throws Exception {
- Analyzer analyzer = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t,
- new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
- }
- };
-
+ public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut);
vocOut.close();
}
+
+ // LUCENE-3043: we use keywordtokenizer in this test,
+ // so ensure the stemmer does not crash on zero-length strings.
+ public void testEmpty() throws Exception {
+ assertAnalyzesTo(analyzer, "", new String[] { "" });
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Mon May 9 15:24:04 2011
@@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends B
assertAnalyzesToReuse(a, "ΠΡÎΫΠÎÎÎΣÎÎΣ ÎÏογοÏ, ο μεÏÏÏÏ ÎºÎ±Î¹ οι άλλοι",
new String[] { "ÏÏοÏ
ÏοθεÏ", "αÏογ", "μεÏÏ", "αλλ" });
}
- }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Mon May 9 15:24:04 2011
@@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Mon May 9 15:24:04 2011
@@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilte
checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java Mon May 9 15:24:04 2011
@@ -36,21 +36,21 @@ import static org.apache.lucene.analysis
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
-public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ };
+
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
- Analyzer a = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t, new PorterStemFilter(t));
- }
- };
-
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
@@ -61,4 +61,9 @@ public class TestPorterStemFilter extend
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Mon May 9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Mon May 9 15:24:04 2011
@@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Mon May 9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Mon May 9 15:24:04 2011
@@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Mon May 9 15:24:04 2011
@@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Mon May 9 15:24:04 2011
@@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extend
checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Mon May 9 15:24:04 2011
@@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends B
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिà¤à¤¦à¥", "हिà¤à¤¦à¥");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestHungarianAnalyzer exten
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extend
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö", "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾", "Õ¡ÖÕ®");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestIndonesianAnalyzer exte
checkOneTermReuse(a, "peledakan", "peledakan");
checkOneTermReuse(a, "pembunuhan", "bunuh");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestItalianAnalyzer extends
checkOneTermReuse(a, "abbandonata", "abbandonata");
checkOneTermReuse(a, "abbandonati", "abbandon");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java Mon May 9 15:24:04 2011
@@ -45,4 +45,9 @@ public class TestItalianLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java Mon May 9 15:24:04 2011
@@ -51,7 +51,7 @@ public class TestLimitTokenCountAnalyzer
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000)));
+ TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(random), 100000)));
Document doc = new Document();
StringBuilder b = new StringBuilder();
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Mon May 9 15:24:04 2011
@@ -185,4 +185,9 @@ public class TestDutchStemmer extends Ba
checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected);
}
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestNorwegianAnalyzer exten
checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Mon May 9 15:24:04 2011
@@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer
new int[]{1, 0, 0, 0},
path.length());
}
+
+ public void testBasicSkip() throws Exception {
+ String path = "/a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{2, 2},
+ new int[]{4, 6},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterSkip() throws Exception {
+ String path = "/a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{2, 2, 2},
+ new int[]{4, 6, 7},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharSkip() throws Exception {
+ String path = "a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{1, 1},
+ new int[]{3, 5},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterSkip() throws Exception {
+ String path = "a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{1, 1, 1},
+ new int[]{3, 5, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterSkip() throws Exception {
+ String path = "/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersSkip() throws Exception {
+ String path = "//";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{1},
+ new int[]{2},
+ new int[]{1},
+ path.length());
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Mon May 9 15:24:04 2011
@@ -50,4 +50,9 @@ public class TestPortugueseAnalyzer exte
checkOneTermReuse(a, "quilométricas", "quilométricas");
checkOneTermReuse(a, "quilométricos", "quilométr");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java Mon May 9 15:24:04 2011
@@ -92,4 +92,9 @@ public class TestPortugueseLightStemFilt
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java Mon May 9 15:24:04 2011
@@ -66,4 +66,9 @@ public class TestPortugueseMinimalStemFi
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Mon May 9 15:24:04 2011
@@ -66,4 +66,9 @@ public class TestPortugueseStemFilter ex
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}