You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/04 19:22:51 UTC

svn commit: r1055130 - in /lucene/dev/trunk/lucene: contrib/ contrib/queries/src/java/org/apache/lucene/search/ contrib/queries/src/test/org/apache/lucene/search/ src/java/org/apache/lucene/search/cache/ src/test/org/apache/lucene/search/

Author: rmuir
Date: Tue Jan  4 18:22:51 2011
New Revision: 1055130

URL: http://svn.apache.org/viewvc?rev=1055130&view=rev
Log:
LUCENE-2836: add FieldCacheRewriteMethod

Added:
    lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java   (with props)
    lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java   (with props)
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Jan  4 18:22:51 2011
@@ -38,6 +38,9 @@ New Features
     that doesn't synchronize on the file handle. This can be useful to 
     avoid the performance problems of SimpleFSDirectory and NIOFSDirectory.
     (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless)
+
+  * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries
+    using the FieldCache's TermsEnum.  (Robert Muir)
   
 API Changes
 

Added: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1055130&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (added)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Tue Jan  4 18:22:51 2011
@@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Rewrites MultiTermQueries into a filter, using the FieldCache for term enumeration.
+ * <p>
+ * WARNING: This is only appropriate for single-valued unanalyzed fields. Additionally, for 
+ * most queries this method is actually SLOWER than using the default CONSTANT_SCORE_AUTO 
+ * in MultiTermQuery. This method is only faster than other methods for certain queries,
+ * such as ones that enumerate many terms.
+ * 
+ * @lucene.experimental
+ */
+public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod {
+  
+  @Override
+  public Query rewrite(IndexReader reader, MultiTermQuery query) {
+    Query result = new ConstantScoreQuery(new MultiTermQueryFieldCacheWrapperFilter(query));
+    result.setBoost(query.getBoost());
+    return result;
+  }
+  
+  static class MultiTermQueryFieldCacheWrapperFilter extends Filter {
+    
+    protected final MultiTermQuery query;
+    
+    /**
+     * Wrap a {@link MultiTermQuery} as a Filter.
+     */
+    protected MultiTermQueryFieldCacheWrapperFilter(MultiTermQuery query) {
+      this.query = query;
+    }
+    
+    @Override
+    public String toString() {
+      // query.toString should be ok for the filter, too, if the query boost is 1.0f
+      return query.toString();
+    }
+    
+    @Override
+    public final boolean equals(final Object o) {
+      if (o==this) return true;
+      if (o==null) return false;
+      if (this.getClass().equals(o.getClass())) {
+        return this.query.equals( ((MultiTermQueryFieldCacheWrapperFilter)o).query );
+      }
+      return false;
+    }
+    
+    @Override
+    public final int hashCode() {
+      return query.hashCode();
+    }
+    
+    /** Returns the field name for this query */
+    public final String getField() { return query.getField(); }
+    
+    /**
+     * Expert: Return the number of unique terms visited during execution of the filter.
+     * If there are many of them, you may consider using another filter type
+     * or optimize your total term count in index.
+     * <p>This method is not thread safe, be sure to only call it when no filter is running!
+     * If you re-use the same filter instance for another
+     * search, be sure to first reset the term counter
+     * with {@link #clearTotalNumberOfTerms}.
+     * @see #clearTotalNumberOfTerms
+     */
+    public int getTotalNumberOfTerms() {
+      return query.getTotalNumberOfTerms();
+    }
+    
+    /**
+     * Expert: Resets the counting of unique terms.
+     * Do this before executing the filter.
+     * @see #getTotalNumberOfTerms
+     */
+    public void clearTotalNumberOfTerms() {
+      query.clearTotalNumberOfTerms();
+    }
+    
+    /**
+     * Returns a DocIdSet with documents that should be permitted in search
+     * results.
+     */
+    @Override
+    public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+      final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, query.field);
+      final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd());
+      TermsEnum termsEnum = query.getTermsEnum(new Terms() {
+        
+        @Override
+        public Comparator<BytesRef> getComparator() throws IOException {
+          return BytesRef.getUTF8SortedAsUnicodeComparator();
+        }
+        
+        @Override
+        public TermsEnum iterator() throws IOException {
+          return fcsi.getTermsEnum();
+        }
+        
+      });
+      
+      assert termsEnum != null;
+      if (termsEnum.next() != null) {
+        // fill into a OpenBitSet
+        int termCount = 0;
+        do {
+          long ord = termsEnum.ord();
+          if (ord > 0) {
+            termSet.fastSet(ord);
+            termCount++;
+          }
+        } while (termsEnum.next() != null);
+        
+        query.incTotalNumberOfTerms(termCount);
+      } else {
+        return DocIdSet.EMPTY_DOCIDSET;
+      }
+      
+      return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) {
+        @Override
+        boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
+          return termSet.fastGet(fcsi.getOrd(doc));
+        }
+      };
+    }
+  }
+}

Added: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java?rev=1055130&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java (added)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/TestFieldCacheRewriteMethod.java Tue Jan  4 18:22:51 2011
@@ -0,0 +1,44 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.automaton.RegExp;
+
+/**
+ * Tests the FieldcacheRewriteMethod with random regular expressions
+ */
+public class TestFieldCacheRewriteMethod extends TestRegexpRandom2 {
+  
+  /** Test fieldcache rewrite against filter rewrite */
+  @Override
+  protected void assertSame(String regexp) throws IOException {   
+    RegexpQuery fieldCache = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
+    fieldCache.setRewriteMethod(new FieldCacheRewriteMethod());
+    
+    RegexpQuery filter = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
+    filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    
+    TopDocs fieldCacheDocs = searcher.search(fieldCache, 25);
+    TopDocs filterDocs = searcher.search(filter, 25);
+
+    CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs);
+  }
+}

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Tue Jan  4 18:22:51 2011
@@ -240,8 +240,28 @@ public class DocTermsIndexCreator extend
 
       @Override
       public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
-        // TODO - we can support with binary search
-        throw new UnsupportedOperationException();
+        int low = 1;
+        int high = numOrd-1;
+        
+        while (low <= high) {
+          int mid = (low + high) >>> 1;
+          seek(mid);
+          int cmp = term.compareTo(text);
+
+          if (cmp < 0)
+            low = mid + 1;
+          else if (cmp > 0)
+            high = mid - 1;
+          else
+            return SeekStatus.FOUND; // key found
+        }
+        
+        if (low == numOrd) {
+          return SeekStatus.END;
+        } else {
+          seek(low);
+          return SeekStatus.NOT_FOUND;
+        }
       }
 
       @Override
@@ -315,7 +335,7 @@ public class DocTermsIndexCreator extend
 
       @Override
       public Comparator<BytesRef> getComparator() throws IOException {
-        throw new UnsupportedOperationException();
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
       }
     }
   }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Tue Jan  4 18:22:51 2011
@@ -180,7 +180,14 @@ public class TestFieldCache extends Luce
       assertEquals(val2, val1);
     }
 
-
+    // seek the enum around (note this isn't a great test here)
+    for (int i = 0; i < 100 * RANDOM_MULTIPLIER; i++) {
+      int k = _TestUtil.nextInt(random, 1, nTerms-1);
+      BytesRef val1 = termsIndex.lookup(k, val);
+      assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seek(val1));
+      assertEquals(val1, tenum.term());
+    }
+    
     // test bad field
     termsIndex = cache.getTermsIndex(reader, "bogusfield");
 

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java?rev=1055130&r1=1055129&r2=1055130&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java Tue Jan  4 18:22:51 2011
@@ -49,7 +49,7 @@ import org.apache.lucene.util.automaton.
  * Generates random regexps, and validates against a simple impl.
  */
 public class TestRegexpRandom2 extends LuceneTestCase {
-  private IndexSearcher searcher;
+  protected IndexSearcher searcher;
   private IndexReader reader;
   private Directory dir;
   
@@ -146,7 +146,7 @@ public class TestRegexpRandom2 extends L
   /** check that the # of hits is the same as from a very
    * simple regexpquery implementation.
    */
-  private void assertSame(String regexp) throws IOException {   
+  protected void assertSame(String regexp) throws IOException {   
     RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
     DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE);