You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/06/06 19:12:03 UTC
svn commit: r1346989 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
Author: mikemccand
Date: Wed Jun 6 17:12:02 2012
New Revision: 1346989
URL: http://svn.apache.org/viewvc?rev=1346989&view=rev
Log:
improve test coverage of Terms.intersect
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java?rev=1346989&r1=1346988&r2=1346989&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java Wed Jun 6 17:12:02 2012
@@ -31,13 +31,14 @@ import org.apache.lucene.search.FieldCac
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.RegExp;
@SuppressCodecs({ "SimpleText", "Memory" })
public class TestTermsEnum extends LuceneTestCase {
@@ -184,8 +185,9 @@ public class TestTermsEnum extends Lucen
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
+
final int numTerms = atLeast(300);
+ //final int numTerms = 50;
final Set<String> terms = new HashSet<String>();
final Collection<String> pendingTerms = new ArrayList<String>();
@@ -259,6 +261,14 @@ public class TestTermsEnum extends Lucen
}
a = DaciukMihovAutomatonBuilder.build(sortedAcceptTerms);
}
+
+ if (random().nextBoolean()) {
+ if (VERBOSE) {
+ System.out.println("TEST: reduce the automaton");
+ }
+ a.reduce();
+ }
+
final CompiledAutomaton c = new CompiledAutomaton(a, true, false);
final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
@@ -321,7 +331,7 @@ public class TestTermsEnum extends Lucen
final BytesRef expected = termsArray[loc];
final BytesRef actual = te.next();
if (VERBOSE) {
- System.out.println("TEST: next() expected=" + expected.utf8ToString() + " actual=" + actual.utf8ToString());
+ System.out.println("TEST: next() expected=" + expected.utf8ToString() + " actual=" + (actual == null ? "null" : actual.utf8ToString()));
}
assertEquals(expected, actual);
assertEquals(1, te.docFreq());
@@ -517,7 +527,7 @@ public class TestTermsEnum extends Lucen
}
private String getRandomString() {
- //return _TestUtil.randomSimpleString(random);
+ //return _TestUtil.randomSimpleString(random());
return _TestUtil.randomRealisticUnicodeString(random());
}
@@ -713,4 +723,55 @@ public class TestTermsEnum extends Lucen
}
}
}
+
+ public void testIntersectBasic() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ iwc.setMergePolicy(new LogDocMergePolicy());
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(newField("field", "aaa", TextField.TYPE_UNSTORED));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(newField("field", "bbb", StringField.TYPE_UNSTORED));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(newField("field", "ccc", TextField.TYPE_UNSTORED));
+ w.addDocument(doc);
+
+ w.forceMerge(1);
+ DirectoryReader r = w.getReader();
+ w.close();
+ AtomicReader sub = r.getSequentialSubReaders()[0];
+ Terms terms = sub.fields().terms("field");
+ Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
+ CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
+ TermsEnum te = terms.intersect(ca, null);
+ assertEquals("aaa", te.next().utf8ToString());
+ assertEquals(0, te.docs(null, null, false).nextDoc());
+ assertEquals("bbb", te.next().utf8ToString());
+ assertEquals(1, te.docs(null, null, false).nextDoc());
+ assertEquals("ccc", te.next().utf8ToString());
+ assertEquals(2, te.docs(null, null, false).nextDoc());
+ assertNull(te.next());
+
+ te = terms.intersect(ca, new BytesRef("abc"));
+ assertEquals("bbb", te.next().utf8ToString());
+ assertEquals(1, te.docs(null, null, false).nextDoc());
+ assertEquals("ccc", te.next().utf8ToString());
+ assertEquals(2, te.docs(null, null, false).nextDoc());
+ assertNull(te.next());
+
+ te = terms.intersect(ca, new BytesRef("aaa"));
+ assertEquals("bbb", te.next().utf8ToString());
+ assertEquals(1, te.docs(null, null, false).nextDoc());
+ assertEquals("ccc", te.next().utf8ToString());
+ assertEquals(2, te.docs(null, null, false).nextDoc());
+ assertNull(te.next());
+
+ r.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1346989&r1=1346988&r2=1346989&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Wed Jun 6 17:12:02 2012
@@ -54,10 +54,13 @@ import org.apache.lucene.store.MockDirec
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LineFileDocs;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
@@ -495,7 +498,6 @@ public class TestFSTs extends LuceneTest
FST<T> fst = builder.finish();
if (random.nextBoolean() && fst != null && !willRewrite) {
- TestFSTs t = new TestFSTs();
IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context);
fst.save(out);
@@ -1136,7 +1138,17 @@ public class TestFSTs extends LuceneTest
}
BytesRef term;
int ord = 0;
+
+ Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
+ final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);
+
while((term = termsEnum.next()) != null) {
+ BytesRef term2 = termsEnum2.next();
+ assertNotNull(term2);
+ assertEquals(term, term2);
+ assertEquals(termsEnum.docFreq(), termsEnum2.docFreq());
+ assertEquals(termsEnum.totalTermFreq(), termsEnum2.totalTermFreq());
+
if (ord == 0) {
try {
termsEnum.ord();