You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/04 19:22:51 UTC
svn commit: r1055130 - in /lucene/dev/trunk/lucene: contrib/
contrib/queries/src/java/org/apache/lucene/search/
contrib/queries/src/test/org/apache/lucene/search/
src/java/org/apache/lucene/search/cache/ src/test/org/apache/lucene/search/
Author: rmuir
Date: Tue Jan 4 18:22:51 2011
New Revision: 1055130
URL: http://svn.apache.org/viewvc?rev=1055130&view=rev
Log:
LUCENE-2836: add FieldCacheRewriteMethod
Added:
lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (with props)
lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java (with props)
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Jan 4 18:22:51 2011
@@ -38,6 +38,9 @@ New Features
that doesn't synchronize on the file handle. This can be useful to
avoid the performance problems of SimpleFSDirectory and NIOFSDirectory.
(Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless)
+
+ * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries
+ using the FieldCache's TermsEnum. (Robert Muir)
API Changes
Added: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1055130&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (added)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Tue Jan 4 18:22:51 2011
@@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Rewrites MultiTermQueries into a filter, using the FieldCache for term enumeration.
+ * <p>
+ * WARNING: This is only appropriate for single-valued unanalyzed fields. Additionally, for
+ * most queries this method is actually SLOWER than using the default CONSTANT_SCORE_AUTO
+ * in MultiTermQuery. This method is only faster than other methods for certain queries,
+ * such as ones that enumerate many terms.
+ *
+ * @lucene.experimental
+ */
+public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod {
+
+ @Override
+ public Query rewrite(IndexReader reader, MultiTermQuery query) {
+ Query result = new ConstantScoreQuery(new MultiTermQueryFieldCacheWrapperFilter(query));
+ result.setBoost(query.getBoost());
+ return result;
+ }
+
+ static class MultiTermQueryFieldCacheWrapperFilter extends Filter {
+
+ protected final MultiTermQuery query;
+
+ /**
+ * Wrap a {@link MultiTermQuery} as a Filter.
+ */
+ protected MultiTermQueryFieldCacheWrapperFilter(MultiTermQuery query) {
+ this.query = query;
+ }
+
+ @Override
+ public String toString() {
+ // query.toString should be ok for the filter, too, if the query boost is 1.0f
+ return query.toString();
+ }
+
+ @Override
+ public final boolean equals(final Object o) {
+ if (o==this) return true;
+ if (o==null) return false;
+ if (this.getClass().equals(o.getClass())) {
+ return this.query.equals( ((MultiTermQueryFieldCacheWrapperFilter)o).query );
+ }
+ return false;
+ }
+
+ @Override
+ public final int hashCode() {
+ return query.hashCode();
+ }
+
+ /** Returns the field name for this query */
+ public final String getField() { return query.getField(); }
+
+ /**
+ * Expert: Return the number of unique terms visited during execution of the filter.
+ * If there are many of them, you may consider using another filter type
+ * or optimize your total term count in index.
+ * <p>This method is not thread safe, be sure to only call it when no filter is running!
+ * If you re-use the same filter instance for another
+ * search, be sure to first reset the term counter
+ * with {@link #clearTotalNumberOfTerms}.
+ * @see #clearTotalNumberOfTerms
+ */
+ public int getTotalNumberOfTerms() {
+ return query.getTotalNumberOfTerms();
+ }
+
+ /**
+ * Expert: Resets the counting of unique terms.
+ * Do this before executing the filter.
+ * @see #getTotalNumberOfTerms
+ */
+ public void clearTotalNumberOfTerms() {
+ query.clearTotalNumberOfTerms();
+ }
+
+ /**
+ * Returns a DocIdSet with documents that should be permitted in search
+ * results.
+ */
+ @Override
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, query.field);
+ final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd());
+ TermsEnum termsEnum = query.getTermsEnum(new Terms() {
+
+ @Override
+ public Comparator<BytesRef> getComparator() throws IOException {
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
+ }
+
+ @Override
+ public TermsEnum iterator() throws IOException {
+ return fcsi.getTermsEnum();
+ }
+
+ });
+
+ assert termsEnum != null;
+ if (termsEnum.next() != null) {
+ // fill into a OpenBitSet
+ int termCount = 0;
+ do {
+ long ord = termsEnum.ord();
+ if (ord > 0) {
+ termSet.fastSet(ord);
+ termCount++;
+ }
+ } while (termsEnum.next() != null);
+
+ query.incTotalNumberOfTerms(termCount);
+ } else {
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
+
+ return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) {
+ @Override
+ boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
+ return termSet.fastGet(fcsi.getOrd(doc));
+ }
+ };
+ }
+ }
+}
Added: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java?rev=1055130&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java (added)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java Tue Jan 4 18:22:51 2011
@@ -0,0 +1,44 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.automaton.RegExp;
+
+/**
+ * Tests the FieldcacheRewriteMethod with random regular expressions
+ */
+public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
+
+ /** Test fieldcache rewrite against filter rewrite */
+ @Override
+ protected void assertSame(String regexp) throws IOException {
+ RegexpQuery fieldCache = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
+ fieldCache.setRewriteMethod(new FieldCacheRewriteMethod());
+
+ RegexpQuery filter = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
+ filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+
+ TopDocs fieldCacheDocs = searcher.search(fieldCache, 25);
+ TopDocs filterDocs = searcher.search(filter, 25);
+
+ CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs);
+ }
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Tue Jan 4 18:22:51 2011
@@ -240,8 +240,28 @@ public class DocTermsIndexCreator extend
@Override
public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
- // TODO - we can support with binary search
- throw new UnsupportedOperationException();
+ int low = 1;
+ int high = numOrd-1;
+
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ seek(mid);
+ int cmp = term.compareTo(text);
+
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else
+ return SeekStatus.FOUND; // key found
+ }
+
+ if (low == numOrd) {
+ return SeekStatus.END;
+ } else {
+ seek(low);
+ return SeekStatus.NOT_FOUND;
+ }
}
@Override
@@ -315,7 +335,7 @@ public class DocTermsIndexCreator extend
@Override
public Comparator<BytesRef> getComparator() throws IOException {
- throw new UnsupportedOperationException();
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Tue Jan 4 18:22:51 2011
@@ -180,7 +180,14 @@ public class TestFieldCache extends Luce
assertEquals(val2, val1);
}
-
+ // seek the enum around (note this isn't a great test here)
+ for (int i = 0; i < 100 * RANDOM_MULTIPLIER; i++) {
+ int k = _TestUtil.nextInt(random, 1, nTerms-1);
+ BytesRef val1 = termsIndex.lookup(k, val);
+ assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seek(val1));
+ assertEquals(val1, tenum.term());
+ }
+
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java Tue Jan 4 18:22:51 2011
@@ -49,7 +49,7 @@ import org.apache.lucene.util.automaton.
* Generates random regexps, and validates against a simple impl.
*/
public class TestRegexpRandom2 extends LuceneTestCase {
- private IndexSearcher searcher;
+ protected IndexSearcher searcher;
private IndexReader reader;
private Directory dir;
@@ -146,7 +146,7 @@ public class TestRegexpRandom2 extends L
/** check that the # of hits is the same as from a very
* simple regexpquery implementation.
*/
- private void assertSame(String regexp) throws IOException {
+ protected void assertSame(String regexp) throws IOException {
RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE);