You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2010/07/22 21:34:52 UTC
svn commit: r966819 [3/20] - in /lucene/dev/branches/realtime_search: ./
lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/benchmark/conf/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucene/contrib/benchmark/src/j...
Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java Thu Jul 22 19:34:35 2010
@@ -23,8 +23,8 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@@ -84,11 +84,11 @@ public class TestEmptyIndex extends Luce
}
}
- public void testTermEnum() throws Exception {
+ public void testTermsEnum() throws Exception {
InstantiatedIndex ii = new InstantiatedIndex();
IndexReader r = new InstantiatedIndexReader(ii);
- termEnumTest(r);
+ termsEnumTest(r);
r.close();
ii.close();
@@ -97,17 +97,13 @@ public class TestEmptyIndex extends Luce
Directory d = new RAMDirectory();
new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).close();
r = IndexReader.open(d, false);
- termEnumTest(r);
+ termsEnumTest(r);
r.close();
d.close();
}
- public void termEnumTest(IndexReader r) throws Exception {
- TermEnum terms = r.terms();
-
- assertNull(terms.term());
- assertFalse(terms.next());
-
+ public void termsEnumTest(IndexReader r) throws Exception {
+ assertNull(MultiFields.getFields(r));
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Thu Jul 22 19:34:35 2010
@@ -33,15 +33,18 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
/**
* Asserts equality of content and behaviour of two index readers.
@@ -120,131 +123,69 @@ public class TestIndicesEquals extends L
// test seek
Term t = new Term("c", "danny");
- TermEnum aprioriTermEnum = aprioriReader.terms(t);
- TermEnum testTermEnum = testReader.terms(t);
-
+ TermsEnum aprioriTermEnum = MultiFields.getTerms(aprioriReader, t.field()).iterator();
+ aprioriTermEnum.seek(new BytesRef(t.text()));
+ TermsEnum testTermEnum = MultiFields.getTerms(testReader, t.field()).iterator();
+ testTermEnum.seek(new BytesRef(t.text()));
assertEquals(aprioriTermEnum.term(), testTermEnum.term());
- t = aprioriTermEnum.term();
-
- aprioriTermEnum.close();
- testTermEnum.close();
+ DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null);
- TermDocs aprioriTermDocs = aprioriReader.termDocs(t);
- TermDocs testTermDocs = testReader.termDocs(t);
-
- assertEquals(aprioriTermDocs.next(), testTermDocs.next());
+ assertEquals(aprioriTermDocs.nextDoc(), testTermDocs.nextDoc());
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- if (aprioriTermDocs.skipTo(4)) {
- assertTrue(testTermDocs.skipTo(4));
+ if (aprioriTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.skipTo(4));
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(4));
}
- if (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
+ if (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.next());
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
}
// beyond this point all next and skipto will return false
- if (aprioriTermDocs.skipTo(100)) {
- assertTrue(testTermDocs.skipTo(100));
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(100));
- }
-
-
- if (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.next());
- }
-
- if (aprioriTermDocs.skipTo(110)) {
- assertTrue(testTermDocs.skipTo(110));
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(110));
- }
-
- if (aprioriTermDocs.skipTo(10)) {
- assertTrue(testTermDocs.skipTo(10));
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(10));
- }
-
-
- if (aprioriTermDocs.skipTo(210)) {
- assertTrue(testTermDocs.skipTo(210));
+ if (aprioriTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.skipTo(210));
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(100));
}
- aprioriTermDocs.close();
- testTermDocs.close();
-
-
-
- // test seek null (AllTermDocs)
- aprioriTermDocs = aprioriReader.termDocs(null);
- testTermDocs = testReader.termDocs(null);
-
- while (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- }
- assertFalse(testTermDocs.next());
-
-
- aprioriTermDocs.close();
- testTermDocs.close();
-
-
- // test seek default
- aprioriTermDocs = aprioriReader.termDocs();
- testTermDocs = testReader.termDocs();
-
- // this is invalid use of the API,
- // but if the response differs then it's an indication that something might have changed.
- // in 2.9 and 3.0 the two TermDocs-implementations returned different values at this point.
-// assertEquals("Descripency during invalid use of the TermDocs API, see comments in test code for details.",
-// aprioriTermDocs.next(), testTermDocs.next());
-
// start using the API the way one is supposed to use it
t = new Term("", "");
- aprioriTermDocs.seek(t);
- testTermDocs.seek(t);
+ FieldsEnum apFieldsEnum = MultiFields.getFields(aprioriReader).iterator();
+ String apFirstField = apFieldsEnum.next();
+
+ FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator();
+ String testFirstField = testFieldsEnum.next();
+ assertEquals(apFirstField, testFirstField);
+
+ aprioriTermEnum = apFieldsEnum.terms();
+ testTermEnum = testFieldsEnum.terms();
+
+ assertEquals(aprioriTermEnum.next(), testTermEnum.next());
+
+ aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs);
+ testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs);
- while (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
+ while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
}
- assertFalse(testTermDocs.next());
-
- aprioriTermDocs.close();
- testTermDocs.close();
-
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
// clean up
aprioriReader.close();
@@ -443,98 +384,85 @@ public class TestIndicesEquals extends L
// compare term enumeration stepping
- TermEnum aprioriTermEnum = aprioriReader.terms();
- TermEnum testTermEnum = testReader.terms();
+ FieldsEnum aprioriFieldsEnum = MultiFields.getFields(aprioriReader).iterator();
+ FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator();
+ String aprioriField;
+ while((aprioriField = aprioriFieldsEnum.next()) != null) {
+ String testField = testFieldsEnum.next();
+ assertEquals(aprioriField, testField);
- while (true) {
-
- if (!aprioriTermEnum.next()) {
- assertFalse(testTermEnum.next());
- break;
- }
- assertTrue(testTermEnum.next());
+ TermsEnum aprioriTermEnum = aprioriFieldsEnum.terms();
+ TermsEnum testTermEnum = testFieldsEnum.terms();
- assertEquals(aprioriTermEnum.term(), testTermEnum.term());
- assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
+ BytesRef aprioriText;
+ while((aprioriText = aprioriTermEnum.next()) != null) {
+ assertEquals(aprioriText, testTermEnum.next());
- // compare termDocs seeking
+ assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
- TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
- TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
+ // compare termDocs seeking
- while (aprioriTermDocsSeeker.next()) {
- assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
- assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
- }
-
- aprioriTermDocsSeeker.close();
- testTermDocsSeeker.close();
-
- // compare documents per term
-
- assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));
-
- TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
- TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());
-
- while (true) {
- if (!aprioriTermDocs.next()) {
- assertFalse(testTermDocs.next());
- break;
+ DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null);
+
+ while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(aprioriTermDocs.docID()) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
}
- assertTrue(testTermDocs.next());
-
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- }
+
+ // compare documents per term
+
+ assertEquals(aprioriReader.docFreq(aprioriField, aprioriTermEnum.term()), testReader.docFreq(aprioriField, testTermEnum.term()));
- aprioriTermDocs.close();
- testTermDocs.close();
+ aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs);
+ testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs);
- // compare term positions
+ while (true) {
+ if (aprioriTermDocs.nextDoc() == DocsEnum.NO_MORE_DOCS) {
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
+ break;
+ }
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
- TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
- TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
+ assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
+ }
- if (aprioriTermPositions != null) {
+ // compare term positions
- for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
- boolean hasNext = aprioriTermPositions.next();
- if (hasNext) {
- assertTrue(testTermPositions.next());
+ DocsAndPositionsEnum aprioriTermPositions = aprioriTermEnum.docsAndPositions(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsAndPositionsEnum testTermPositions = testTermEnum.docsAndPositions(MultiFields.getDeletedDocs(testReader), null);
- assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
+ if (aprioriTermPositions != null) {
+ for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
+ boolean hasNext = aprioriTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS;
+ if (hasNext) {
+ assertTrue(testTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS);
+
+ assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
- for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
- int aprioriPos = aprioriTermPositions.nextPosition();
- int testPos = testTermPositions.nextPosition();
+ for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
+ int aprioriPos = aprioriTermPositions.nextPosition();
+ int testPos = testTermPositions.nextPosition();
- if (aprioriPos != testPos) {
assertEquals(aprioriPos, testPos);
- }
-
- assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
- if (aprioriTermPositions.isPayloadAvailable()) {
- assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
- byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
- byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
- for (int i = 0; i < aprioriPayloads.length; i++) {
- assertEquals(aprioriPayloads[i], testPayloads[i]);
+ assertEquals(aprioriTermPositions.hasPayload(), testTermPositions.hasPayload());
+ if (aprioriTermPositions.hasPayload()) {
+ BytesRef apPayload = aprioriTermPositions.getPayload();
+ BytesRef testPayload = testTermPositions.getPayload();
+ assertEquals(apPayload, testPayload);
}
}
-
}
}
}
-
- aprioriTermPositions.close();
- testTermPositions.close();
-
}
+ assertNull(testTermEnum.next());
}
+ assertNull(testFieldsEnum.next());
// compare term vectors and position vectors
@@ -589,12 +517,8 @@ public class TestIndicesEquals extends L
}
}
-
}
- aprioriTermEnum.close();
- testTermEnum.close();
-
aprioriReader.close();
testReader.close();
}
Propchange: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:943137,949730
+/lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:953476-966816
/lucene/java/branches/flex_1458/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:824912-931101
/lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:909334,948516
/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:924483-924731,924781,925176-925462
Modified: lucene/dev/branches/realtime_search/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java Thu Jul 22 19:34:35 2010
@@ -43,8 +43,10 @@ import org.apache.lucene.document.Fielda
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
@@ -58,6 +60,7 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
/**
* Various methods that interact with Lucene and provide info about the
@@ -342,13 +345,21 @@ class LuceneMethods {
public void terms(String field) throws IOException {
TreeMap<String,Integer> termMap = new TreeMap<String,Integer>();
IndexReader indexReader = IndexReader.open(indexName, true);
- TermEnum terms = indexReader.terms();
- while (terms.next()) {
- Term term = terms.term();
- //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
- //if we're either not looking by field or we're matching the specific field
- if ((field == null) || field.equals(term.field()))
- termMap.put(term.field() + ":" + term.text(), Integer.valueOf((terms.docFreq())));
+ Fields fields = MultiFields.getFields(indexReader);
+ if (fields != null) {
+ FieldsEnum fieldsEnum = fields.iterator();
+ String curField;
+ while((curField = fieldsEnum.next()) != null) {
+ TermsEnum terms = fieldsEnum.terms();
+ BytesRef text;
+ while ((text = terms.next()) != null) {
+ //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
+ //if we're either not looking by field or we're matching the specific field
+ if ((field == null) || field.equals(curField)) {
+ termMap.put(curField + ":" + text.utf8ToString(), Integer.valueOf((terms.docFreq())));
+ }
+ }
+ }
}
Iterator<String> termIterator = termMap.keySet().iterator();
Modified: lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Jul 22 19:34:35 2010
@@ -44,11 +44,8 @@ import org.apache.lucene.index.FieldsEnu
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.Collector;
@@ -210,7 +207,7 @@ public class MemoryIndex implements Seri
if (o1 instanceof Map.Entry<?,?>) o1 = ((Map.Entry<?,?>) o1).getKey();
if (o2 instanceof Map.Entry<?,?>) o2 = ((Map.Entry<?,?>) o2).getKey();
if (o1 == o2) return 0;
- return ((String) o1).compareTo((String) o2);
+ return ((Comparable) o1).compareTo((Comparable) o2);
}
};
@@ -344,21 +341,19 @@ public class MemoryIndex implements Seri
if (fields.get(fieldName) != null)
throw new IllegalArgumentException("field must not be added more than once");
- HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
+ HashMap<BytesRef,ArrayIntList> terms = new HashMap<BytesRef,ArrayIntList>();
int numTokens = 0;
int numOverlapTokens = 0;
int pos = -1;
- TermToBytesRefAttribute termAtt = stream.addAttribute(TermToBytesRefAttribute.class);
+ TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
BytesRef ref = new BytesRef(10);
stream.reset();
while (stream.incrementToken()) {
termAtt.toBytesRef(ref);
- // TODO: support non-UTF8 strings (like numerics) here
- String term = ref.utf8ToString();
- if (term.length() == 0) continue; // nothing to do
+ if (ref.length == 0) continue; // nothing to do
// if (DEBUG) System.err.println("token='" + term + "'");
numTokens++;
final int posIncr = posIncrAttribute.getPositionIncrement();
@@ -366,10 +361,10 @@ public class MemoryIndex implements Seri
numOverlapTokens++;
pos += posIncr;
- ArrayIntList positions = terms.get(term);
+ ArrayIntList positions = terms.get(ref);
if (positions == null) { // term not seen before
positions = new ArrayIntList(stride);
- terms.put(term, positions);
+ terms.put(new BytesRef(ref), positions);
}
if (stride == 1) {
positions.add(pos);
@@ -493,9 +488,10 @@ public class MemoryIndex implements Seri
int len = info.terms.size();
size += VM.sizeOfHashMap(len);
- Iterator<Map.Entry<String,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
+ Iterator<Map.Entry<BytesRef,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
while (--len >= 0) { // for each term
- Map.Entry<String,ArrayIntList> e = iter2.next();
+ Map.Entry<BytesRef,ArrayIntList> e = iter2.next();
+ // FIXME: this calculation is probably not correct since we use bytes now.
size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay
// size += STR + 2 * ((String) e.getKey()).length();
ArrayIntList positions = e.getValue();
@@ -537,7 +533,7 @@ public class MemoryIndex implements Seri
public String toString() {
StringBuilder result = new StringBuilder(256);
sortFields();
- int sumChars = 0;
+ int sumBytes = 0;
int sumPositions = 0;
int sumTerms = 0;
@@ -548,32 +544,32 @@ public class MemoryIndex implements Seri
info.sortTerms();
result.append(fieldName + ":\n");
- int numChars = 0;
+ int numBytes = 0;
int numPositions = 0;
for (int j=0; j < info.sortedTerms.length; j++) {
- Map.Entry<String,ArrayIntList> e = info.sortedTerms[j];
- String term = e.getKey();
+ Map.Entry<BytesRef,ArrayIntList> e = info.sortedTerms[j];
+ BytesRef term = e.getKey();
ArrayIntList positions = e.getValue();
result.append("\t'" + term + "':" + numPositions(positions) + ":");
result.append(positions.toString(stride)); // ignore offsets
result.append("\n");
numPositions += numPositions(positions);
- numChars += term.length();
+ numBytes += term.length;
}
result.append("\tterms=" + info.sortedTerms.length);
result.append(", positions=" + numPositions);
- result.append(", Kchars=" + (numChars/1000.0f));
+ result.append(", Kbytes=" + (numBytes/1000.0f));
result.append("\n");
sumPositions += numPositions;
- sumChars += numChars;
+ sumBytes += numBytes;
sumTerms += info.sortedTerms.length;
}
result.append("\nfields=" + sortedFields.length);
result.append(", terms=" + sumTerms);
result.append(", positions=" + sumPositions);
- result.append(", Kchars=" + (sumChars/1000.0f));
+ result.append(", Kbytes=" + (sumBytes/1000.0f));
return result.toString();
}
@@ -591,10 +587,10 @@ public class MemoryIndex implements Seri
* Term strings and their positions for this field: Map <String
* termText, ArrayIntList positions>
*/
- private final HashMap<String,ArrayIntList> terms;
+ private final HashMap<BytesRef,ArrayIntList> terms;
/** Terms sorted ascending by term text; computed on demand */
- private transient Map.Entry<String,ArrayIntList>[] sortedTerms;
+ private transient Map.Entry<BytesRef,ArrayIntList>[] sortedTerms;
/** Number of added tokens for this field */
private final int numTokens;
@@ -610,7 +606,7 @@ public class MemoryIndex implements Seri
private static final long serialVersionUID = 2882195016849084649L;
- public Info(HashMap<String,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
+ public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
this.terms = terms;
this.numTokens = numTokens;
this.numOverlapTokens = numOverlapTokens;
@@ -630,7 +626,7 @@ public class MemoryIndex implements Seri
}
/** note that the frequency can be calculated as numPosition(getPositions(x)) */
- public ArrayIntList getPositions(String term) {
+ public ArrayIntList getPositions(BytesRef term) {
return terms.get(term);
}
@@ -732,7 +728,6 @@ public class MemoryIndex implements Seri
///////////////////////////////////////////////////////////////////////////////
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
- private static final Term MATCH_ALL_TERM = new Term("");
/**
* Search support for Lucene framework integration; implements all methods
@@ -763,18 +758,12 @@ public class MemoryIndex implements Seri
public int docFreq(Term term) {
Info info = getInfo(term.field());
int freq = 0;
- if (info != null) freq = info.getPositions(term.text()) != null ? 1 : 0;
+ if (info != null) freq = info.getPositions(term.bytes()) != null ? 1 : 0;
if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq);
return freq;
}
@Override
- public TermEnum terms() {
- if (DEBUG) System.err.println("MemoryIndexReader.terms()");
- return terms(MATCH_ALL_TERM);
- }
-
- @Override
public Fields fields() {
sortFields();
@@ -818,7 +807,7 @@ public class MemoryIndex implements Seri
@Override
public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUTF16Comparator();
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
@@ -843,8 +832,7 @@ public class MemoryIndex implements Seri
@Override
public SeekStatus seek(BytesRef text, boolean useCache) {
- final String s = text.utf8ToString();
- termUpto = Arrays.binarySearch(info.sortedTerms, s, termComparator);
+ termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
if (termUpto < 0) { // not found; choose successor
termUpto = -termUpto -1;
if (termUpto >= info.sortedTerms.length) {
@@ -913,7 +901,7 @@ public class MemoryIndex implements Seri
@Override
public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUTF16Comparator();
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}
@@ -1011,202 +999,6 @@ public class MemoryIndex implements Seri
}
@Override
- public TermEnum terms(Term term) {
- if (DEBUG) System.err.println("MemoryIndexReader.terms: " + term);
-
- int i; // index into info.sortedTerms
- int j; // index into sortedFields
-
- sortFields();
- if (sortedFields.length == 1 && sortedFields[0].getKey() == term.field()) {
- j = 0; // fast path
- } else {
- j = Arrays.binarySearch(sortedFields, term.field(), termComparator);
- }
-
- if (j < 0) { // not found; choose successor
- j = -j -1;
- i = 0;
- if (j < sortedFields.length) getInfo(j).sortTerms();
- } else { // found
- Info info = getInfo(j);
- info.sortTerms();
- i = Arrays.binarySearch(info.sortedTerms, term.text(), termComparator);
- if (i < 0) { // not found; choose successor
- i = -i -1;
- if (i >= info.sortedTerms.length) { // move to next successor
- j++;
- i = 0;
- if (j < sortedFields.length) getInfo(j).sortTerms();
- }
- }
- }
- final int ix = i;
- final int jx = j;
-
- return new TermEnum() {
-
- private int srtTermsIdx = ix; // index into info.sortedTerms
- private int srtFldsIdx = jx; // index into sortedFields
-
- @Override
- public boolean next() {
- if (DEBUG) System.err.println("TermEnum.next");
- if (srtFldsIdx >= sortedFields.length) return false;
- Info info = getInfo(srtFldsIdx);
- if (++srtTermsIdx < info.sortedTerms.length) return true;
-
- // move to successor
- srtFldsIdx++;
- srtTermsIdx = 0;
- if (srtFldsIdx >= sortedFields.length) return false;
- getInfo(srtFldsIdx).sortTerms();
- return true;
- }
-
- @Override
- public Term term() {
- if (DEBUG) System.err.println("TermEnum.term: " + srtTermsIdx);
- if (srtFldsIdx >= sortedFields.length) return null;
- Info info = getInfo(srtFldsIdx);
- if (srtTermsIdx >= info.sortedTerms.length) return null;
-// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey());
- return createTerm(info, srtFldsIdx, info.sortedTerms[srtTermsIdx].getKey());
- }
-
- @Override
- public int docFreq() {
- if (DEBUG) System.err.println("TermEnum.docFreq");
- if (srtFldsIdx >= sortedFields.length) return 0;
- Info info = getInfo(srtFldsIdx);
- if (srtTermsIdx >= info.sortedTerms.length) return 0;
- return numPositions(info.getPositions(srtTermsIdx));
- }
-
- @Override
- public void close() {
- if (DEBUG) System.err.println("TermEnum.close");
- }
-
- /** Returns a new Term object, minimizing String.intern() overheads. */
- private Term createTerm(Info info, int pos, String text) {
- // Assertion: sortFields has already been called before
- Term template = info.template;
- if (template == null) { // not yet cached?
- String fieldName = sortedFields[pos].getKey();
- template = new Term(fieldName);
- info.template = template;
- }
-
- return template.createTerm(text);
- }
-
- };
- }
-
- @Override
- public TermPositions termPositions() {
- if (DEBUG) System.err.println("MemoryIndexReader.termPositions");
-
- return new TermPositions() {
-
- private boolean hasNext;
- private int cursor = 0;
- private ArrayIntList current;
- private Term term;
-
- public void seek(Term term) {
- this.term = term;
- if (DEBUG) System.err.println(".seek: " + term);
- if (term == null) {
- hasNext = true; // term==null means match all docs
- } else {
- Info info = getInfo(term.field());
- current = info == null ? null : info.getPositions(term.text());
- hasNext = (current != null);
- cursor = 0;
- }
- }
-
- public void seek(TermEnum termEnum) {
- if (DEBUG) System.err.println(".seekEnum");
- seek(termEnum.term());
- }
-
- public int doc() {
- if (DEBUG) System.err.println(".doc");
- return 0;
- }
-
- public int freq() {
- int freq = current != null ? numPositions(current) : (term == null ? 1 : 0);
- if (DEBUG) System.err.println(".freq: " + freq);
- return freq;
- }
-
- public boolean next() {
- if (DEBUG) System.err.println(".next: " + current + ", oldHasNext=" + hasNext);
- boolean next = hasNext;
- hasNext = false;
- return next;
- }
-
- public int read(int[] docs, int[] freqs) {
- if (DEBUG) System.err.println(".read: " + docs.length);
- if (!hasNext) return 0;
- hasNext = false;
- docs[0] = 0;
- freqs[0] = freq();
- return 1;
- }
-
- public boolean skipTo(int target) {
- if (DEBUG) System.err.println(".skipTo: " + target);
- return next();
- }
-
- public void close() {
- if (DEBUG) System.err.println(".close");
- }
-
- public int nextPosition() { // implements TermPositions
- int pos = current.get(cursor);
- cursor += stride;
- if (DEBUG) System.err.println(".nextPosition: " + pos);
- return pos;
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public int getPayloadLength() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public boolean isPayloadAvailable() {
- // unsuported
- return false;
- }
-
- };
- }
-
- @Override
- public TermDocs termDocs() {
- if (DEBUG) System.err.println("MemoryIndexReader.termDocs");
- return termPositions();
- }
-
- @Override
public TermFreqVector[] getTermFreqVectors(int docNumber) {
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
TermFreqVector[] vectors = new TermFreqVector[fields.size()];
@@ -1267,7 +1059,7 @@ public class MemoryIndex implements Seri
return new TermPositionVector() {
- private final Map.Entry<String,ArrayIntList>[] sortedTerms = info.sortedTerms;
+ private final Map.Entry<BytesRef,ArrayIntList>[] sortedTerms = info.sortedTerms;
public String getField() {
return fieldName;
@@ -1277,8 +1069,8 @@ public class MemoryIndex implements Seri
return sortedTerms.length;
}
- public String[] getTerms() {
- String[] terms = new String[sortedTerms.length];
+ public BytesRef[] getTerms() {
+ BytesRef[] terms = new BytesRef[sortedTerms.length];
for (int i=sortedTerms.length; --i >= 0; ) {
terms[i] = sortedTerms[i].getKey();
}
@@ -1293,12 +1085,12 @@ public class MemoryIndex implements Seri
return freqs;
}
- public int indexOf(String term) {
+ public int indexOf(BytesRef term) {
int i = Arrays.binarySearch(sortedTerms, term, termComparator);
return i >= 0 ? i : -1;
}
- public int[] indexesOf(String[] terms, int start, int len) {
+ public int[] indexesOf(BytesRef[] terms, int start, int len) {
int[] indexes = new int[len];
for (int i=0; i < len; i++) {
indexes[i] = indexOf(terms[start++]);
Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java Thu Jul 22 19:34:35 2010
@@ -69,7 +69,7 @@ public class TermVectorAccessor {
}
/** Instance reused to save garbage collector some time */
- private List<String> tokens;
+ private List<BytesRef> tokens;
/** Instance reused to save garbage collector some time */
private List<int[]> positions;
@@ -91,7 +91,7 @@ public class TermVectorAccessor {
private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
if (tokens == null) {
- tokens = new ArrayList<String>(500);
+ tokens = new ArrayList<BytesRef>(500);
positions = new ArrayList<int[]>(500);
frequencies = new ArrayList<Integer>(500);
} else {
@@ -122,7 +122,7 @@ public class TermVectorAccessor {
if (docID == documentNumber) {
frequencies.add(Integer.valueOf(docs.freq()));
- tokens.add(text.utf8ToString());
+ tokens.add(new BytesRef(text));
if (!mapper.isIgnoringPositions()) {
int[] positions = new int[docs.freq()];
@@ -173,7 +173,7 @@ public class TermVectorAccessor {
}
@Override
- public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
decorated.map(term, frequency, offsets, positions);
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java Thu Jul 22 19:34:35 2010
@@ -16,14 +16,16 @@ package org.apache.lucene.misc;
* limitations under the License.
*/
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.Bits;
import java.io.File;
import java.io.IOException;
@@ -109,37 +111,25 @@ public class LengthNormModifier {
String fieldName = StringHelper.intern(field);
int[] termCounts = new int[0];
- IndexReader reader = null;
- TermEnum termEnum = null;
- TermDocs termDocs = null;
+ IndexReader reader = IndexReader.open(dir, false);
try {
- reader = IndexReader.open(dir, false);
+
termCounts = new int[reader.maxDoc()];
- try {
- termEnum = reader.terms(new Term(field));
- try {
- termDocs = reader.termDocs();
- do {
- Term term = termEnum.term();
- if (term != null && term.field().equals(fieldName)) {
- termDocs.seek(termEnum.term());
- while (termDocs.next()) {
- termCounts[termDocs.doc()] += termDocs.freq();
- }
- }
- } while (termEnum.next());
- } finally {
- if (null != termDocs) termDocs.close();
+ Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ while(termsEnum.next() != null) {
+ docs = termsEnum.docs(delDocs, docs);
+ int doc;
+ while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ termCounts[doc] += docs.freq();
+ }
}
- } finally {
- if (null != termEnum) termEnum.close();
}
- } finally {
- if (null != reader) reader.close();
- }
-
- try {
- reader = IndexReader.open(dir, false);
+
for (int d = 0; d < termCounts.length; d++) {
if (! reader.isDeleted(d)) {
byte norm = Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
@@ -147,7 +137,7 @@ public class LengthNormModifier {
}
}
} finally {
- if (null != reader) reader.close();
+ reader.close();
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/overview.html?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/overview.html (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/overview.html Thu Jul 22 19:34:35 2010
@@ -21,6 +21,57 @@
</title>
</head>
<body>
- miscellaneous
- </body>
-</html>
\ No newline at end of file
+
+<h2>Misc Tools</h2>
+
+The misc package has various tools for splitting/merging indices,
+changing norms, finding high freq terms, and others.
+
+<h2>DirectIOLinuxDirectory</h2>
+
+<p>
+<b>NOTE</b>: This uses C++ sources (accessible via JNI), which you'll
+have to compile on your platform. Further, this is a very
+platform-specific extensions (runs only on Linux, and likely only on
+2.6.x kernels).
+
+<p>
+DirectIOLinuxDirectory is a Directory implementation that bypasses the
+OS's buffer cache for any IndexInput and IndexOutput opened through it
+(using the linux-specific O_DIRECT flag).
+
+<p>
+Note that doing so typically results in bad performance loss! You
+should not use this for searching, but rather for indexing (or maybe
+just merging during indexing), to avoid evicting useful pages from the
+buffer cache.
+
+See <a target=_top href="http://chbits.blogspot.com/2010/06/lucene-and-fadvisemadvise.html">here</a>
+for details.
+
+Steps to build:
+<ul>
+ <li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt>
+
+ <li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib).
+
+ <li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)
+
+ <li> <tt>ant jar</tt> to compile the java source and put that JAR on your CLASSPATH
+</ul>
+
+<p>
+To use this, you'll likely want to make a custom subclass of
+FSDirectory that only opens direct IndexInput/Output for merging. One
+hackish way to do this is to check if the current thread's name starts
+with "Lucene Merge Thread". Alternatively, you could use this Dir as
+is for all indexing ops, but not for searching.
+
+<p>
+NativePosixUtil.cpp/java also expose access to the posix_madvise,
+madvise, posix_fadvise functions, which are somewhat more cross
+platform than O_DIRECT, however, in testing (see above link), these
+APIs did not seem to help prevent buffer cache eviction.
+</body>
+
+</html>
Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Thu Jul 22 19:34:35 2010
@@ -22,6 +22,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
public class TestMultiPassIndexSplitter extends LuceneTestCase {
IndexReader input;
@@ -62,30 +63,30 @@ public class TestMultiPassIndexSplitter
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
Document doc = ir.document(0);
assertEquals("0", doc.get("id"));
- Term t;
- TermEnum te;
- t = new Term("id", "1");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertNotSame("1", te.term().utf8ToString());
ir.close();
ir = IndexReader.open(dirs[1], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
doc = ir.document(0);
assertEquals("1", doc.get("id"));
- t = new Term("id", "0");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+
+ assertNotSame("0", te.term().utf8ToString());
ir.close();
ir = IndexReader.open(dirs[2], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
doc = ir.document(0);
assertEquals("2", doc.get("id"));
- t = new Term("id", "1");
- te = ir.terms(t);
- assertNotSame(t, te.term());
- t = new Term("id", "0");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+
+ te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertNotSame("1", te.term());
+
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+ assertNotSame("0", te.term().utf8ToString());
}
/**
@@ -117,10 +118,9 @@ public class TestMultiPassIndexSplitter
doc = ir.document(0);
assertEquals(start + "", doc.get("id"));
// make sure the deleted doc is not here
- Term t;
- TermEnum te;
- t = new Term("id", (NUM_DOCS - 1) + "");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
+ Term t = new Term("id", (NUM_DOCS - 1) + "");
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef(t.text())));
+ assertNotSame(t.text(), te.term().utf8ToString());
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java Thu Jul 22 19:34:35 2010
@@ -76,21 +76,21 @@ public class TestTermVectorAccessor exte
mapper = new ParallelArrayTermVectorMapper();
accessor.accept(ir, i, "a", mapper);
tfv = mapper.materializeVector();
- assertEquals("doc " + i, "a", tfv.getTerms()[0]);
+ assertEquals("doc " + i, "a", tfv.getTerms()[0].utf8ToString());
assertEquals("doc " + i, 8, tfv.getTermFrequencies()[0]);
mapper = new ParallelArrayTermVectorMapper();
accessor.accept(ir, i, "b", mapper);
tfv = mapper.materializeVector();
assertEquals("doc " + i, 8, tfv.getTermFrequencies().length);
- assertEquals("doc " + i, "b", tfv.getTerms()[1]);
+ assertEquals("doc " + i, "b", tfv.getTerms()[1].utf8ToString());
assertEquals("doc " + i, 7, tfv.getTermFrequencies()[1]);
mapper = new ParallelArrayTermVectorMapper();
accessor.accept(ir, i, "c", mapper);
tfv = mapper.materializeVector();
assertEquals("doc " + i, 8, tfv.getTermFrequencies().length);
- assertEquals("doc " + i, "c", tfv.getTerms()[2]);
+ assertEquals("doc " + i, "c", tfv.getTerms()[2].utf8ToString());
assertEquals("doc " + i, 7, tfv.getTermFrequencies()[2]);
mapper = new ParallelArrayTermVectorMapper();
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Thu Jul 22 19:34:35 2010
@@ -213,7 +213,7 @@ public class FuzzyLikeThisQuery extends
totalVariantDocFreqs+=fe.docFreq();
float score=boostAtt.getBoost();
if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
- ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), possibleMatch.utf8ToString()),score,startTerm);
+ ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);
variantsQ.insertWithOverflow(st);
minScore = variantsQ.top().score; // maintain minScore
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Thu Jul 22 19:34:35 2010
@@ -24,8 +24,14 @@ import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@@ -52,28 +58,37 @@ public class TermsFilter extends Filter
* @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
*/
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException
- {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
OpenBitSet result=new OpenBitSet(reader.maxDoc());
- TermDocs td = reader.termDocs();
- try
- {
- for (Iterator<Term> iter = terms.iterator(); iter.hasNext();)
- {
- Term term = iter.next();
- td.seek(term);
- while (td.next())
- {
- result.set(td.doc());
- }
- }
+ Fields fields = MultiFields.getFields(reader);
+ BytesRef br = new BytesRef();
+ Bits delDocs = MultiFields.getDeletedDocs(reader);
+ if (fields != null) {
+ String lastField = null;
+ Terms termsC = null;
+ TermsEnum termsEnum = null;
+ DocsEnum docs = null;
+ for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) {
+ Term term = iter.next();
+ if (term.field() != lastField) {
+ termsC = fields.terms(term.field());
+ termsEnum = termsC.iterator();
+ lastField = term.field();
}
- finally
- {
- td.close();
+
+ if (terms != null) {
+ br.copy(term.bytes());
+ if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) {
+ docs = termsEnum.docs(delDocs, docs);
+ while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ result.set(docs.docID());
+ }
+ }
}
- return result;
- }
+ }
+ }
+ return result;
+ }
@Override
public boolean equals(Object obj)
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java Thu Jul 22 19:34:35 2010
@@ -18,7 +18,7 @@ package org.apache.lucene.search.regex;
*/
import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.search.FilteredTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
@@ -60,8 +60,8 @@ public class RegexQuery extends MultiTer
}
@Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new RegexTermEnum(reader, term, regexImpl);
+ protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ return new RegexTermsEnum(reader, term, regexImpl);
}
@Override
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Thu Jul 22 19:34:35 2010
@@ -47,6 +47,7 @@ import org.apache.lucene.search.Similari
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
@@ -848,10 +849,10 @@ public final class MoreLikeThis {
*/
private void addTermFrequencies(Map<String,Int> termFreqMap, TermFreqVector vector)
{
- String[] terms = vector.getTerms();
+ BytesRef[] terms = vector.getTerms();
int freqs[]=vector.getTermFrequencies();
for (int j = 0; j < terms.length; j++) {
- String term = terms[j];
+ String term = terms[j].utf8ToString();
if(isNoiseWord(term)){
continue;
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java Thu Jul 22 19:34:35 2010
@@ -24,8 +24,8 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
@@ -38,7 +38,7 @@ public class BooleanFilterTest extends L
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
+ RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
//Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
@@ -47,12 +47,18 @@ public class BooleanFilterTest extends L
addDoc(writer, "guest", "020", "20050101","Y");
addDoc(writer, "admin", "020", "20050101","Maybe");
addDoc(writer, "admin guest", "030", "20050101","N");
-
- writer.close();
- reader=IndexReader.open(directory, true);
+ reader = writer.getReader();
+ writer.close();
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ reader.close();
+ directory.close();
+ super.tearDown();
}
- private void addDoc(IndexWriter writer, String accessRights, String price, String date, String inStock) throws IOException
+ private void addDoc(RandomIndexWriter writer, String accessRights, String price, String date, String inStock) throws IOException
{
Document doc=new Document();
doc.add(new Field("accessRights",accessRights,Field.Store.YES,Field.Index.ANALYZED));
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java Thu Jul 22 19:34:35 2010
@@ -19,12 +19,14 @@ package org.apache.lucene.search;
import java.util.Calendar;
import java.util.GregorianCalendar;
+import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -47,18 +49,22 @@ public class ChainedFilterTest extends L
private RAMDirectory directory;
private IndexSearcher searcher;
+ private IndexReader reader;
private Query query;
// private DateFilter dateFilter; DateFilter was deprecated and removed
private TermRangeFilter dateFilter;
private QueryWrapperFilter bobFilter;
private QueryWrapperFilter sueFilter;
+ private Random random;
+
@Override
protected void setUp() throws Exception {
super.setUp();
+ random = newRandom();
directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
Calendar cal = new GregorianCalendar();
cal.clear();
@@ -73,10 +79,10 @@ public class ChainedFilterTest extends L
cal.add(Calendar.DATE, 1);
}
-
+ reader = writer.getReader();
writer.close();
- searcher = new IndexSearcher(directory, true);
+ searcher = new IndexSearcher(reader);
// query for everything to make life easier
BooleanQuery bq = new BooleanQuery();
@@ -96,6 +102,14 @@ public class ChainedFilterTest extends L
new TermQuery(new Term("owner", "sue")));
}
+ @Override
+ public void tearDown() throws Exception {
+ searcher.close();
+ reader.close();
+ directory.close();
+ super.tearDown();
+ }
+
private ChainedFilter getChainedFilter(Filter[] chain, int[] logic) {
if (logic == null) {
return new ChainedFilter(chain);
@@ -186,10 +200,12 @@ public class ChainedFilterTest extends L
public void testWithCachingFilter() throws Exception {
Directory dir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexReader reader = writer.getReader();
writer.close();
- Searcher searcher = new IndexSearcher(dir, true);
+ Searcher searcher = new IndexSearcher(reader);
Query query = new TermQuery(new Term("none", "none"));
@@ -206,6 +222,9 @@ public class ChainedFilterTest extends L
// throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot be cast to java.util.BitSet
searcher.search(new MatchAllDocsQuery(), cf, 1);
+ searcher.close();
+ reader.close();
+ dir.close();
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Thu Jul 22 19:34:35 2010
@@ -24,12 +24,14 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
public class DuplicateFilterTest extends LuceneTestCase {
private static final String KEY_FIELD = "url";
@@ -42,8 +44,8 @@ public class DuplicateFilterTest extends
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
//Add series of docs with filterable fields : url, text and dates flags
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
@@ -54,9 +56,13 @@ public class DuplicateFilterTest extends
addDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101");
addDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101");
addDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102");
-
- writer.close();
- reader=IndexReader.open(directory, true);
+
+ // Until we fix LUCENE-2348, the index must
+ // have only 1 segment:
+ writer.optimize();
+
+ reader = writer.getReader();
+ writer.close();
searcher =new IndexSearcher(reader);
}
@@ -69,7 +75,7 @@ public class DuplicateFilterTest extends
super.tearDown();
}
- private void addDoc(IndexWriter writer, String url, String text, String date) throws IOException
+ private void addDoc(RandomIndexWriter writer, String url, String text, String date) throws IOException
{
Document doc=new Document();
doc.add(new Field(KEY_FIELD,url,Field.Store.YES,Field.Index.NOT_ANALYZED));
@@ -134,11 +140,14 @@ public class DuplicateFilterTest extends
{
Document d=searcher.doc(hits[i].doc);
String url=d.get(KEY_FIELD);
- TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ KEY_FIELD,
+ new BytesRef(url));
int lastDoc=0;
- while(td.next())
+ while(td.nextDoc() != DocsEnum.NO_MORE_DOCS)
{
- lastDoc=td.doc();
+ lastDoc=td.docID();
}
assertEquals("Duplicate urls should return last doc",lastDoc, hits[i].doc);
}
@@ -155,10 +164,13 @@ public class DuplicateFilterTest extends
{
Document d=searcher.doc(hits[i].doc);
String url=d.get(KEY_FIELD);
- TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ KEY_FIELD,
+ new BytesRef(url));
int lastDoc=0;
- td.next();
- lastDoc=td.doc();
+ td.nextDoc();
+ lastDoc=td.docID();
assertEquals("Duplicate urls should return first doc",lastDoc, hits[i].doc);
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Thu Jul 22 19:34:35 2010
@@ -24,8 +24,9 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
@@ -33,13 +34,15 @@ import org.apache.lucene.util.LuceneTest
public class FuzzyLikeThisQueryTest extends LuceneTestCase {
private RAMDirectory directory;
private IndexSearcher searcher;
+ private IndexReader reader;
private Analyzer analyzer=new MockAnalyzer();
@Override
protected void setUp() throws Exception {
super.setUp();
directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+ RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
//Add series of docs with misspelt names
addDoc(writer, "jonathon smythe","1");
@@ -48,12 +51,20 @@ public class FuzzyLikeThisQueryTest exte
addDoc(writer, "johnny smith","4" );
addDoc(writer, "jonny smith","5" );
addDoc(writer, "johnathon smythe","6");
-
+ reader = writer.getReader();
writer.close();
- searcher=new IndexSearcher(directory, true);
+ searcher=new IndexSearcher(reader);
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ searcher.close();
+ reader.close();
+ directory.close();
+ super.tearDown();
}
- private void addDoc(IndexWriter writer, String name, String id) throws IOException
+ private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException
{
Document doc=new Document();
doc.add(new Field("name",name,Field.Store.YES,Field.Index.ANALYZED));
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/TermsFilterTest.java Thu Jul 22 19:34:35 2010
@@ -23,8 +23,8 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
@@ -53,16 +53,16 @@ public class TermsFilterTest extends Luc
public void testMissingTerms() throws Exception {
String fieldName="field1";
RAMDirectory rd=new RAMDirectory();
- IndexWriter w = new IndexWriter(rd, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ RandomIndexWriter w = new RandomIndexWriter(newRandom(), rd,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
for (int i = 0; i < 100; i++) {
Document doc=new Document();
int term=i*10; //terms are units of 10;
doc.add(new Field(fieldName,""+term,Field.Store.YES,Field.Index.NOT_ANALYZED));
w.addDocument(doc);
}
+ IndexReader reader = w.getReader();
w.close();
- IndexReader reader = IndexReader.open(rd, true);
TermsFilter tf=new TermsFilter();
tf.addTerm(new Term(fieldName,"19"));
@@ -80,6 +80,8 @@ public class TermsFilterTest extends Luc
tf.addTerm(new Term(fieldName,"00"));
bits = (OpenBitSet)tf.getDocIdSet(reader);
assertEquals("Must match 2", 2, bits.cardinality());
-
+
+ reader.close();
+ rd.close();
}
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java Thu Jul 22 19:34:35 2010
@@ -17,15 +17,17 @@ package org.apache.lucene.search.regex;
* limitations under the License.
*/
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
@@ -33,30 +35,30 @@ import org.apache.lucene.util.LuceneTest
public class TestRegexQuery extends LuceneTestCase {
private IndexSearcher searcher;
+ private IndexReader reader;
+ private Directory directory;
private final String FN = "field";
@Override
protected void setUp() throws Exception {
super.setUp();
- RAMDirectory directory = new RAMDirectory();
- try {
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
- Document doc = new Document();
- doc.add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
- writer.addDocument(doc);
- writer.optimize();
- writer.close();
- searcher = new IndexSearcher(directory, true);
- } catch (Exception e) {
- fail(e.toString());
- }
+ directory = new RAMDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ Document doc = new Document();
+ doc.add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ reader = writer.getReader();
+ writer.close();
+ searcher = new IndexSearcher(reader);
}
@Override
protected void tearDown() throws Exception {
searcher.close();
+ reader.close();
+ directory.close();
super.tearDown();
}
@@ -80,10 +82,9 @@ public class TestRegexQuery extends Luce
}
public void testMatchAll() throws Exception {
- TermEnum terms = new RegexQuery(new Term(FN, "jum.")).getEnum(searcher.getIndexReader());
+ TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader());
// no term should match
- assertNull(terms.term());
- assertFalse(terms.next());
+ assertNull(terms.next());
}
public void testRegex1() throws Exception {
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java Thu Jul 22 19:34:35 2010
@@ -28,8 +28,8 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -38,95 +38,94 @@ import org.apache.lucene.store.RAMDirect
import org.apache.lucene.util.LuceneTestCase;
public class TestMoreLikeThis extends LuceneTestCase {
- private RAMDirectory directory;
- private IndexReader reader;
- private IndexSearcher searcher;
-
- @Override
- protected void setUp() throws Exception {
- super.setUp();
- directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
-
- // Add series of docs with specific information for MoreLikeThis
- addDoc(writer, "lucene");
- addDoc(writer, "lucene release");
-
- writer.close();
- reader = IndexReader.open(directory, true);
- searcher = new IndexSearcher(reader);
-
- }
-
- @Override
- protected void tearDown() throws Exception {
- reader.close();
- searcher.close();
- directory.close();
- super.tearDown();
- }
-
- private void addDoc(IndexWriter writer, String text) throws IOException {
- Document doc = new Document();
- doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
- }
-
- public void testBoostFactor() throws Throwable {
- Map<String,Float> originalValues = getOriginalValues();
-
- MoreLikeThis mlt = new MoreLikeThis(
- reader);
- mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
- mlt.setMinDocFreq(1);
- mlt.setMinTermFreq(1);
- mlt.setMinWordLen(1);
- mlt.setFieldNames(new String[] { "text" });
- mlt.setBoost(true);
-
- // this mean that every term boost factor will be multiplied by this
- // number
- float boostFactor = 5;
- mlt.setBoostFactor(boostFactor);
-
- BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
- "lucene release"));
- List<BooleanClause> clauses = query.clauses();
-
- assertEquals("Expected " + originalValues.size() + " clauses.",
- originalValues.size(), clauses.size());
-
- for (int i = 0; i < clauses.size(); i++) {
- BooleanClause clause = clauses.get(i);
- TermQuery tq = (TermQuery) clause.getQuery();
- Float termBoost = originalValues.get(tq.getTerm().text());
- assertNotNull("Expected term " + tq.getTerm().text(), termBoost);
-
- float totalBoost = termBoost.floatValue() * boostFactor;
- assertEquals("Expected boost of " + totalBoost + " for term '"
- + tq.getTerm().text() + "' got " + tq.getBoost(),
- totalBoost, tq.getBoost(), 0.0001);
- }
+ private RAMDirectory directory;
+ private IndexReader reader;
+ private IndexSearcher searcher;
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ directory = new RAMDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(newRandom(), directory,
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+
+ // Add series of docs with specific information for MoreLikeThis
+ addDoc(writer, "lucene");
+ addDoc(writer, "lucene release");
+
+ reader = writer.getReader();
+ writer.close();
+ searcher = new IndexSearcher(reader);
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ reader.close();
+ searcher.close();
+ directory.close();
+ super.tearDown();
+ }
+
+ private void addDoc(RandomIndexWriter writer, String text) throws IOException {
+ Document doc = new Document();
+ doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+
+ public void testBoostFactor() throws Throwable {
+ Map<String,Float> originalValues = getOriginalValues();
+
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(1);
+ mlt.setMinWordLen(1);
+ mlt.setFieldNames(new String[] {"text"});
+ mlt.setBoost(true);
+
+ // this mean that every term boost factor will be multiplied by this
+ // number
+ float boostFactor = 5;
+ mlt.setBoostFactor(boostFactor);
+
+ BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
+ "lucene release"));
+ List<BooleanClause> clauses = query.clauses();
+
+ assertEquals("Expected " + originalValues.size() + " clauses.",
+ originalValues.size(), clauses.size());
+
+ for (int i = 0; i < clauses.size(); i++) {
+ BooleanClause clause = clauses.get(i);
+ TermQuery tq = (TermQuery) clause.getQuery();
+ Float termBoost = originalValues.get(tq.getTerm().text());
+ assertNotNull("Expected term " + tq.getTerm().text(), termBoost);
+
+ float totalBoost = termBoost.floatValue() * boostFactor;
+ assertEquals("Expected boost of " + totalBoost + " for term '"
+ + tq.getTerm().text() + "' got " + tq.getBoost(), totalBoost, tq
+ .getBoost(), 0.0001);
}
-
- private Map<String,Float> getOriginalValues() throws IOException {
- Map<String,Float> originalValues = new HashMap<String,Float>();
- MoreLikeThis mlt = new MoreLikeThis(reader);
- mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
- mlt.setMinDocFreq(1);
- mlt.setMinTermFreq(1);
- mlt.setMinWordLen(1);
- mlt.setFieldNames(new String[] { "text" });
- mlt.setBoost(true);
- BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
- "lucene release"));
- List<BooleanClause> clauses = query.clauses();
-
- for (int i = 0; i < clauses.size(); i++) {
- BooleanClause clause = clauses.get(i);
- TermQuery tq = (TermQuery) clause.getQuery();
- originalValues.put(tq.getTerm().text(), Float.valueOf(tq.getBoost()));
- }
- return originalValues;
+ }
+
+ private Map<String,Float> getOriginalValues() throws IOException {
+ Map<String,Float> originalValues = new HashMap<String,Float>();
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(1);
+ mlt.setMinWordLen(1);
+ mlt.setFieldNames(new String[] {"text"});
+ mlt.setBoost(true);
+ BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
+ "lucene release"));
+ List<BooleanClause> clauses = query.clauses();
+
+ for (int i = 0; i < clauses.size(); i++) {
+ BooleanClause clause = clauses.get(i);
+ TermQuery tq = (TermQuery) clause.getQuery();
+ originalValues.put(tq.getTerm().text(), Float.valueOf(tq.getBoost()));
}
+ return originalValues;
+ }
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java Thu Jul 22 19:34:35 2010
@@ -126,7 +126,7 @@ public class ExtendableQueryParser exten
}
@Override
- protected Query getFieldQuery(final String field, final String queryText)
+ protected Query getFieldQuery(final String field, final String queryText, boolean quoted)
throws ParseException {
final Pair<String,String> splitExtensionField = this.extensions
.splitExtensionField(defaultField, field);
@@ -136,7 +136,7 @@ public class ExtendableQueryParser exten
return extension.parse(new ExtensionQuery(this, splitExtensionField.cur,
queryText));
}
- return super.getFieldQuery(field, queryText);
+ return super.getFieldQuery(field, queryText, quoted);
}
}