You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/14 15:51:59 UTC
svn commit: r1103112 [17/24] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/cont...
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestLockFactory.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestLockFactory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestLockFactory.java Sat May 14 13:51:35 2011
@@ -49,7 +49,7 @@ public class TestLockFactory extends Luc
// Lock prefix should have been set:
assertTrue("lock prefix was not set by the RAMDirectory", lf.lockPrefixSet);
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
// add 100 documents (so that commit lock is used)
for (int i = 0; i < 100; i++) {
@@ -81,13 +81,13 @@ public class TestLockFactory extends Luc
assertTrue("RAMDirectory.setLockFactory did not take",
NoLockFactory.class.isInstance(dir.getLockFactory()));
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.commit(); // required so the second open succeed
// Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not
// using any locks:
IndexWriter writer2 = null;
try {
- writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
} catch (Exception e) {
e.printStackTrace(System.out);
fail("Should not have hit an IOException with no locking");
@@ -107,12 +107,12 @@ public class TestLockFactory extends Luc
assertTrue("RAMDirectory did not use correct LockFactory: got " + dir.getLockFactory(),
SingleInstanceLockFactory.class.isInstance(dir.getLockFactory()));
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
// Create a 2nd IndexWriter. This should fail:
IndexWriter writer2 = null;
try {
- writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
fail("Should have hit an IOException with two IndexWriters on default SingleInstanceLockFactory");
} catch (IOException e) {
}
@@ -148,7 +148,7 @@ public class TestLockFactory extends Luc
Directory dir = newFSDirectory(indexDir, lockFactory);
// First create a 1 doc index:
- IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
+ IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
addDoc(w);
w.close();
@@ -280,7 +280,7 @@ public class TestLockFactory extends Luc
IndexWriter writer = null;
for(int i=0;i<this.numIteration;i++) {
try {
- writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
} catch (IOException e) {
if (e.toString().indexOf(" timed out:") == -1) {
hitException = true;
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java Sat May 14 13:51:35 2011
@@ -41,7 +41,7 @@ public class TestMultiMMap extends Lucen
@Override
public void setUp() throws Exception {
super.setUp();
- workDir = new File(TEMP_DIR, "TestMultiMMap");
+ workDir = _TestUtil.getTempDir("TestMultiMMap");
workDir.mkdirs();
}
@@ -59,7 +59,7 @@ public class TestMultiMMap extends Lucen
// we will map a lot, try to turn on the unmap hack
if (MMapDirectory.UNMAP_SUPPORTED)
dir.setUseUnmap(true);
- RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED);
Field junk = newField("junk", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java Sat May 14 13:51:35 2011
@@ -24,6 +24,7 @@ import java.io.ObjectOutputStream;
import java.io.ByteArrayOutputStream;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -49,11 +50,11 @@ public class TestRAMDirectory extends Lu
@Override
public void setUp() throws Exception {
super.setUp();
- indexDir = new File(TEMP_DIR, "RAMDirIndex");
+ indexDir = _TestUtil.getTempDir("RAMDirIndex");
Directory dir = newFSDirectory(indexDir);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
// add some documents
Document doc = null;
for (int i = 0; i < docsToAdd; i++) {
@@ -105,7 +106,7 @@ public class TestRAMDirectory extends Lu
dir.close();
final IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
writer.optimize();
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestWindowsMMap.java Sat May 14 13:51:35 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.io.File;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@@ -59,7 +60,7 @@ public class TestWindowsMMap extends Luc
}
private final static String storePathname =
- new File(TEMP_DIR,"testLuceneMmap").getAbsolutePath();
+ _TestUtil.getTempDir("testLuceneMmap").getAbsolutePath();
public void testMmapIndex() throws Exception {
// sometimes the directory is not cleaned by rmDir, because on Windows it
@@ -71,7 +72,7 @@ public class TestWindowsMMap extends Luc
// plan to add a set of useful stopwords, consider changing some of the
// interior filters.
- MockAnalyzer analyzer = new MockAnalyzer();
+ MockAnalyzer analyzer = new MockAnalyzer(random);
// TODO: something about lock timeouts and leftover locks.
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer)
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java Sat May 14 13:51:35 2011
@@ -144,6 +144,24 @@ public class TestArrayUtil extends Lucen
}
}
+ private Integer[] createSparseRandomArray(int maxSize) {
+ final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = Integer.valueOf(random.nextInt(2));
+ }
+ return a;
+ }
+
+ // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
+ public void testQuickToMergeSortFallback() {
+ for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
+ Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
+ ArrayUtil.quickSort(a1);
+ Arrays.sort(a2);
+ assertArrayEquals(a2, a1);
+ }
+ }
+
public void testMergeSort() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java Sat May 14 13:51:35 2011
@@ -147,4 +147,14 @@ public class TestAttributeSource extends
fail("Should throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {}
}
+
+ public void testLUCENE_3042() throws Exception {
+ final AttributeSource src1 = new AttributeSource();
+ src1.addAttribute(CharTermAttribute.class).append("foo");
+ int hash1 = src1.hashCode(); // this triggers a cached state
+ final AttributeSource src2 = new AttributeSource(src1);
+ src2.addAttribute(TypeAttribute.class).setType("bar");
+ assertTrue("The hashCode is identical, so the captured state was preserved.", hash1 != src1.hashCode());
+ assertEquals(src2.hashCode(), src1.hashCode());
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java Sat May 14 13:51:35 2011
@@ -135,6 +135,7 @@ public class TestBytesRefHash extends Lu
public void testCompact() {
BytesRef ref = new BytesRef();
for (int j = 0; j < 2 * RANDOM_MULTIPLIER; j++) {
+ int numEntries = 0;
final int size = 797;
BitSet bits = new BitSet(size);
for (int i = 0; i < size; i++) {
@@ -143,13 +144,21 @@ public class TestBytesRefHash extends Lu
str = _TestUtil.randomRealisticUnicodeString(random, 1000);
} while (str.length() == 0);
ref.copy(str);
- bits.set(hash.add(ref));
-
+ final int key = hash.add(ref);
+ if (key < 0) {
+ assertTrue(bits.get((-key)-1));
+ } else {
+ assertFalse(bits.get(key));
+ bits.set(key);
+ numEntries++;
+ }
}
assertEquals(hash.size(), bits.cardinality());
+ assertEquals(numEntries, bits.cardinality());
+ assertEquals(numEntries, hash.size());
int[] compact = hash.compact();
- assertTrue(size < compact.length);
- for (int i = 0; i < size; i++) {
+ assertTrue(numEntries < compact.length);
+ for (int i = 0; i < numEntries; i++) {
bits.set(compact[i], false);
}
assertEquals(0, bits.cardinality());
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java Sat May 14 13:51:35 2011
@@ -43,8 +43,8 @@ public class TestFieldCacheSanityChecker
dirA = newDirectory();
dirB = newDirectory();
- IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
- IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java Sat May 14 13:51:35 2011
@@ -39,6 +39,11 @@ public class TestLevenshteinAutomata ext
assertCharVectors(2);
}
+ // LUCENE-3094
+ public void testNoWastedStates() throws Exception {
+ AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
+ }
+
/**
* Tests all possible characteristic vectors for some n
* This exhaustively tests the parametric transitions tables.
@@ -66,6 +71,7 @@ public class TestLevenshteinAutomata ext
assertNotNull(automata[n]);
assertTrue(automata[n].isDeterministic());
assertTrue(SpecialOperations.isFinite(automata[n]));
+ AutomatonTestUtil.assertNoDetachedStates(automata[n]);
// check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) {
assertTrue(automata[n-1].subsetOf(automata[n]));
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Sat May 14 13:51:35 2011
@@ -288,6 +288,36 @@ public class TestFSTs extends LuceneTest
}
new FSTTester<IntsRef>(random, dir, inputMode, pairs, outputs).doTest();
}
+
+ // Up to two positive ints, shared, generally but not
+ // monotonically increasing
+ {
+ if (VERBOSE) {
+ System.out.println("TEST: now test UpToTwoPositiveIntOutputs");
+ }
+ final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true);
+ final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
+ long lastOutput = 0;
+ for(int idx=0;idx<terms.length;idx++) {
+ // Sometimes go backwards
+ long value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ while(value < 0) {
+ value = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ }
+ final Object output;
+ if (random.nextInt(5) == 3) {
+ long value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ while(value2 < 0) {
+ value2 = lastOutput + _TestUtil.nextInt(random, -100, 1000);
+ }
+ output = outputs.get(value, value2);
+ } else {
+ output = outputs.get(value);
+ }
+ pairs.add(new FSTTester.InputOutput<Object>(terms[idx], output));
+ }
+ new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
+ }
}
private static class FSTTester<T> {
@@ -328,11 +358,13 @@ public class TestFSTs extends LuceneTest
// no pruning
doTest(0, 0);
- // simple pruning
- doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
-
- // leafy pruning
- doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ if (!(outputs instanceof UpToTwoPositiveIntOutputs)) {
+ // simple pruning
+ doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
+
+ // leafy pruning
+ doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+ }
}
// runs the term, returning the output, or null if term
@@ -421,7 +453,15 @@ public class TestFSTs extends LuceneTest
prune1==0 && prune2==0, outputs);
for(InputOutput<T> pair : pairs) {
- builder.add(pair.input, pair.output);
+ if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
+ final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
+ final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
+ @SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
+ builderObject.add(pair.input, _outputs.get(twoLongs.first));
+ builderObject.add(pair.input, _outputs.get(twoLongs.second));
+ } else {
+ builder.add(pair.input, pair.output);
+ }
}
FST<T> fst = builder.finish();
@@ -942,7 +982,7 @@ public class TestFSTs extends LuceneTest
final LineFileDocs docs = new LineFileDocs(random);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1;
- final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+ final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
final IndexWriter writer = new IndexWriter(dir, conf);
@@ -1421,4 +1461,73 @@ public class TestFSTs extends LuceneTest
FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
s.verifyStateAndBelow(fst, arc, 1);
}
+
+ // Make sure raw FST can differentiate between final vs
+ // non-final end nodes
+ public void testNonFinalStopNodes() throws Exception {
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ final Long nothing = outputs.getNoOutput();
+ final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+ final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+
+ final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
+
+ // Add final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ node.isFinal = true;
+ rootNode.addArc('a', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[0].nextFinalOutput = outputs.get(17);
+ rootNode.arcs[0].isFinal = true;
+ rootNode.arcs[0].output = nothing;
+ rootNode.arcs[0].target = frozen;
+ }
+
+ // Add non-final stop node
+ {
+ final Builder.UnCompiledNode<Long> node = new Builder.UnCompiledNode<Long>(b, 0);
+ rootNode.addArc('b', node);
+ final Builder.CompiledNode frozen = new Builder.CompiledNode();
+ frozen.address = fst.addNode(node);
+ rootNode.arcs[1].nextFinalOutput = nothing;
+ rootNode.arcs[1].output = outputs.get(42);
+ rootNode.arcs[1].target = frozen;
+ }
+
+ fst.finish(fst.addNode(rootNode));
+
+ checkStopNodes(fst, outputs);
+
+ // Make sure it still works after save/load:
+ Directory dir = newDirectory();
+ IndexOutput out = dir.createOutput("fst");
+ fst.save(out);
+ out.close();
+
+ IndexInput in = dir.openInput("fst");
+ final FST<Long> fst2 = new FST<Long>(in, outputs);
+ checkStopNodes(fst2, outputs);
+ in.close();
+ dir.close();
+ }
+
+ private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
+ final Long nothing = outputs.getNoOutput();
+ FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
+ assertEquals(nothing, startArc.output);
+ assertEquals(nothing, startArc.nextFinalOutput);
+
+ FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
+ assertEquals('a', arc.label);
+ assertEquals(17, arc.nextFinalOutput.longValue());
+ assertTrue(arc.isFinal());
+
+ arc = fst.readNextArc(arc);
+ assertEquals('b', arc.label);
+ assertFalse(arc.isFinal());
+ assertEquals(42, arc.output.longValue());
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java Sat May 14 13:51:35 2011
@@ -33,7 +33,8 @@ public enum LicenseType {
MPL("Mozilla Public License", false), //NOT SURE on the required notice
PD("Public Domain", false),
//SUNBCLA("Sun Binary Code License Agreement"),
- SUN("Sun Open Source License", false)
+ SUN("Sun Open Source License", false),
+ FAKE("FAKE license - not needed", false)
;
private String display;
Modified: lucene/dev/branches/flexscoring/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/CHANGES.txt?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/CHANGES.txt Sat May 14 13:51:35 2011
@@ -83,6 +83,8 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
/something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java Sat May 14 13:51:35 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Sat May 14 13:51:35 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Sat May 14 13:51:35 2011
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Sat May 14 13:51:35 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Sat May 14 13:51:35 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Sat May 14 13:51:35 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java Sat May 14 13:51:35 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java Sat May 14 13:51:35 2011
@@ -132,7 +132,8 @@ public class GermanStemmer
strip( buffer );
}
// Additional step for irregular plural nouns like "Matrizen -> Matrix".
- if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+ // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms
+ if ( buffer.length() > 0 && buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
buffer.setCharAt( buffer.length() - 1, 'x' );
}
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Sat May 14 13:51:35 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Sat May 14 13:51:35 2011
@@ -31,8 +31,6 @@ import org.apache.lucene.util.Version;
/**
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
* tokenized as "avion" (plane).
- * <p>
- * Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out.
*
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
*/
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java Sat May 14 13:51:35 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Sat May 14 13:51:35 2011
@@ -19,11 +19,13 @@ package org.apache.lucene.analysis.it;
import java.io.IOException;
import java.io.Reader;
+import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.fr.ElisionFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -38,6 +40,14 @@ import org.tartarus.snowball.ext.Italian
/**
* {@link Analyzer} for Italian.
+ * <p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ItalianAnalyzer:
+ * <ul>
+ * <li> As of 3.2, ElisionFilter with a set of Italian
+ * contractions is used by default.
+ * </ul>
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
@@ -45,6 +55,13 @@ public final class ItalianAnalyzer exten
/** File containing default Italian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+ private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+ new CharArraySet(Version.LUCENE_CURRENT,
+ Arrays.asList(
+ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell",
+ "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"
+ ), true));
+
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
@@ -112,7 +129,7 @@ public final class ItalianAnalyzer exten
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
- * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+ * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@@ -121,6 +138,9 @@ public final class ItalianAnalyzer exten
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
+ if (matchVersion.onOrAfter(Version.LUCENE_32)) {
+ result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES);
+ }
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Sat May 14 13:51:35 2011
@@ -25,57 +25,71 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
- *
+ *
* Take something like:
- *
+ *
* <pre>
- * /soemthing/something/else
+ * /something/something/else
* </pre>
- *
+ *
* and make:
- *
+ *
* <pre>
- * /soemthing
- * /soemthing/something
- * /soemthing/something/else
+ * /something
+ * /something/something
+ * /something/something/else
* </pre>
- *
*/
public class PathHierarchyTokenizer extends Tokenizer {
public PathHierarchyTokenizer(Reader input) {
- this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
}
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
- this(input, bufferSize, delimiter, delimiter);
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
}
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
- this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
}
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
super(input);
termAtt.resizeBuffer(bufferSize);
+
this.delimiter = delimiter;
this.replacement = replacement;
- endDelimiter = false;
+ this.skip = skip;
resultToken = new StringBuilder(bufferSize);
}
-
+
private static final int DEFAULT_BUFFER_SIZE = 1024;
public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
private final char delimiter;
private final char replacement;
-
+ private final int skip;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+ private int startPosition = 0;
private int finalOffset = 0;
- private boolean endDelimiter;
+ private int skipped = 0;
+ private boolean endDelimiter = false;
private StringBuilder resultToken;
+
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
@@ -97,43 +111,69 @@ public class PathHierarchyTokenizer exte
while (true) {
int c = input.read();
- if( c < 0 ) {
- length += resultToken.length();
- termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
- if( added ){
- resultToken.setLength(0);
- resultToken.append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- added = true;
- if( c == delimiter ) {
- if( length > 0 ){
- endDelimiter = true;
- break;
+ if( c < 0 ){
+ if( skipped > skip ) {
+ length += resultToken.length();
+ termAtt.setLength(length);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+ if( added ){
+ resultToken.setLength(0);
+ resultToken.append(termAtt.buffer(), 0, length);
+ }
+ return added;
}
else{
- termAtt.append(replacement);
+ finalOffset = correctOffset(startPosition + length);
+ return false;
+ }
+ }
+ if( !added ){
+ added = true;
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
+ else {
+ startPosition++;
+ }
}
else {
- termAtt.append((char)c);
- length++;
+ if( c == delimiter ){
+ if( skipped > skip ){
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(replacement);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
+ else {
+ if( skipped > skip ){
+ termAtt.append((char)c);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
}
}
-
length += resultToken.length();
termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
-
+
@Override
public final void end() {
// set final offset
@@ -146,5 +186,6 @@ public class PathHierarchyTokenizer exte
resultToken.setLength(0);
finalOffset = 0;
endDelimiter = false;
+ skipped = 0;
}
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java Sat May 14 13:51:35 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Sat May 14 13:51:35 2011
@@ -131,5 +131,8 @@ public final class ThaiWordFilter extend
public void reset() throws IOException {
super.reset();
hasMoreTokensInClone = false;
+ clonedToken = null;
+ clonedTermAtt = null;
+ clonedOffsetAtt = null;
}
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Sat May 14 13:51:35 2011
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Sat May 14 13:51:35 2011
@@ -98,4 +98,9 @@ public class TestArabicAnalyzer extends
assertAnalyzesTo(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
assertAnalyzesToReuse(a, "ÙبÙرة the quick ساÙدÙات", new String[] { "ÙبÙر","the", "quick", "ساÙد" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Sat May 14 13:51:35 2011
@@ -75,4 +75,9 @@ public class TestBulgarianAnalyzer exten
Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "ÑÑÑоевеÑе ÑÑÑоеве", new String[] { "ÑÑÑой", "ÑÑÑоеве" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Sat May 14 13:51:35 2011
@@ -157,4 +157,8 @@ public class TestBrazilianStemmer extend
checkOneTermReuse(a, input, expected);
}
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestCatalanAnalyzer extends
checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java Sat May 14 13:51:35 2011
@@ -270,4 +270,9 @@ public class TestCJKTokenizer extends Ba
newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE),
newToken("ãã", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Sat May 14 13:51:35 2011
@@ -210,6 +210,13 @@ public class TestAnalyzers extends BaseT
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
final class PayloadSetter extends TokenFilter {
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Sat May 14 13:51:35 2011
@@ -309,4 +309,9 @@ public class TestClassicAnalyzer extends
dir.close();
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Sat May 14 13:51:35 2011
@@ -102,4 +102,9 @@ public class TestKeywordAnalyzer extends
assertEquals(0, offsetAtt.startOffset());
assertEquals(4, offsetAtt.endOffset());
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Sat May 14 13:51:35 2011
@@ -3,6 +3,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
@@ -219,4 +220,9 @@ public class TestStandardAnalyzer extend
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Sat May 14 13:51:35 2011
@@ -418,4 +418,9 @@ public class TestUAX29URLEmailTokenizer
new String[] { "ä»®", "å", "é£", "ã", "ã«ã¿ã«ã" },
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Sat May 14 13:51:35 2011
@@ -67,4 +67,9 @@ public class TestCzechAnalyzer extends B
CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestDanishAnalyzer extends
checkOneTermReuse(a, "undersøgelse", "undersøgelse");
checkOneTermReuse(a, "undersøg", "undersøg");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Sat May 14 13:51:35 2011
@@ -63,4 +63,9 @@ public class TestGermanAnalyzer extends
checkOneTermReuse(a, "Schaltflächen", "schaltflach");
checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java Sat May 14 13:51:35 2011
@@ -45,4 +45,9 @@ public class TestGermanLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java Sat May 14 13:51:35 2011
@@ -57,4 +57,9 @@ public class TestGermanMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java Sat May 14 13:51:35 2011
@@ -36,20 +36,30 @@ import static org.apache.lucene.analysis
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t,
+ new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ };
- public void testStemming() throws Exception {
- Analyzer analyzer = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t,
- new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
- }
- };
-
+ public void testStemming() throws Exception {
InputStream vocOut = getClass().getResourceAsStream("data.txt");
assertVocabulary(analyzer, vocOut);
vocOut.close();
}
+
+ // LUCENE-3043: we use keywordtokenizer in this test,
+ // so ensure the stemmer does not crash on zero-length strings.
+ public void testEmpty() throws Exception {
+ assertAnalyzesTo(analyzer, "", new String[] { "" });
+ }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Sat May 14 13:51:35 2011
@@ -87,4 +87,9 @@ public class GreekAnalyzerTest extends B
assertAnalyzesToReuse(a, "ΠΡÎΫΠÎÎÎΣÎÎΣ ÎÏογοÏ, ο μεÏÏÏÏ ÎºÎ±Î¹ οι άλλοι",
new String[] { "ÏÏοÏ
ÏοθεÏ", "αÏογ", "μεÏÏ", "αλλ" });
}
- }
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
+}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Sat May 14 13:51:35 2011
@@ -52,4 +52,9 @@ public class TestEnglishAnalyzer extends
checkOneTermReuse(a, "books", "books");
checkOneTermReuse(a, "book", "book");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java Sat May 14 13:51:35 2011
@@ -51,4 +51,9 @@ public class TestEnglishMinimalStemFilte
checkOneTerm(analyzer, "congress", "congress");
checkOneTerm(analyzer, "serious", "serious");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java Sat May 14 13:51:35 2011
@@ -36,21 +36,21 @@ import static org.apache.lucene.analysis
/**
* Test the PorterStemFilter with Martin Porter's test data.
*/
-public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+public class TestPorterStemFilter extends BaseTokenStreamTestCase {
+ Analyzer a = new ReusableAnalyzerBase() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ };
+
/**
* Run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
public void testPorterStemFilter() throws Exception {
- Analyzer a = new ReusableAnalyzerBase() {
- @Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- Tokenizer t = new KeywordTokenizer(reader);
- return new TokenStreamComponents(t, new PorterStemFilter(t));
- }
- };
-
assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
}
@@ -61,4 +61,9 @@ public class TestPorterStemFilter extend
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, a, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestSpanishAnalyzer extends
checkOneTermReuse(a, "chicana", "chican");
checkOneTermReuse(a, "chicano", "chicano");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java Sat May 14 13:51:35 2011
@@ -45,4 +45,9 @@ public class TestSpanishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestBasqueAnalyzer extends
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Sat May 14 13:51:35 2011
@@ -219,4 +219,9 @@ public class TestPersianAnalyzer extends
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestFinnishAnalyzer extends
checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java Sat May 14 13:51:35 2011
@@ -45,4 +45,9 @@ public class TestFinnishLightStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Sat May 14 13:51:35 2011
@@ -260,4 +260,9 @@ public class TestFrenchAnalyzer extends
FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
assertAnalyzesTo(a, "Votre", new String[] { });
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java Sat May 14 13:51:35 2011
@@ -159,4 +159,9 @@ public class TestFrenchLightStemFilter e
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java Sat May 14 13:51:35 2011
@@ -59,4 +59,9 @@ public class TestFrenchMinimalStemFilter
public void testVocabulary() throws IOException {
assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestGalicianAnalyzer extend
checkOneTermReuse(a, "correspondente", "correspondente");
checkOneTermReuse(a, "corresponderá", "correspond");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Sat May 14 13:51:35 2011
@@ -47,4 +47,9 @@ public class TestHindiAnalyzer extends B
HindiAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "हिà¤à¤¦à¥", "हिà¤à¤¦à¥");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestHungarianAnalyzer exten
checkOneTermReuse(a, "babakocsi", "babakocsi");
checkOneTermReuse(a, "babakocsijáért", "babakocs");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1103112&r1=1103111&r2=1103112&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Sat May 14 13:51:35 2011
@@ -50,4 +50,9 @@ public class TestArmenianAnalyzer extend
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö", "Õ¡ÖÕ®Õ«Õ¾Õ¶Õ¥Ö");
checkOneTermReuse(a, "Õ¡ÖÕ®Õ«Õ¾", "Õ¡ÖÕ®");
}
+
+ /** blast some random strings through the analyzer */
+ public void testRandomStrings() throws Exception {
+ checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
+ }
}