You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/11/25 02:21:54 UTC

svn commit: r1206033 - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/

Author: uschindler
Date: Fri Nov 25 01:21:53 2011
New Revision: 1206033

URL: http://svn.apache.org/viewvc?rev=1206033&view=rev
Log:
LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter to correctly respect deletions on reopened SegmentReaders. Factored out FieldCacheDocIdSet to be a top-level class

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1206033&r1=1206032&r2=1206033&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Nov 25 01:21:53 2011
@@ -663,6 +663,12 @@ New Features
   have at least one or no value at all in a specific field. (Simon Willnauer,
   Uwe Schindler, Robert Muir)
   
+Bug fixes
+
+* LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter
+  to correctly respect deletions on reopened SegmentReaders. Factored out
+  FieldCacheDocIdSet to be a top-level class.  (Uwe Schindler, Simon Willnauer)
+
 ======================= Lucene 3.5.0 =======================
 
 Changes in backwards compatibility policy

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java?rev=1206033&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java Fri Nov 25 01:21:53 2011
@@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.OpenBitSet;
+
+/**
+ * Base class for DocIdSet to be used with FieldCache. The implementation
+ * of its iterator is very stupid and slow if the implementation of the
+ * {@link #matchDoc} method is not optimized, as iterators simply increment
+ * the document id until {@code matchDoc(int)} returns true. Because of this
+ * {@code matchDoc(int)} must be as fast as possible and in no case do any
+ * I/O.
+ * @lucene.internal
+ */
+public abstract class FieldCacheDocIdSet extends DocIdSet {
+
+  protected final int maxDoc;
+  protected final Bits acceptDocs;
+
+  public FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
+    this.maxDoc = maxDoc;
+    this.acceptDocs = acceptDocs;
+  }
+
+  /**
+   * this method checks, if a doc is a hit
+   */
+  protected abstract boolean matchDoc(int doc);
+
+  /**
+   * this DocIdSet is always cacheable (does not go back
+   * to the reader for iteration)
+   */
+  @Override
+  public final boolean isCacheable() {
+    return true;
+  }
+
+  @Override
+  public final Bits bits() {
+    return (acceptDocs == null) ? new Bits() {
+      public boolean get(int docid) {
+        return matchDoc(docid);
+      }
+
+      public int length() {
+        return maxDoc;
+      }
+    } : new Bits() {
+      public boolean get(int docid) {
+        return acceptDocs.get(docid) && matchDoc(docid);
+      }
+
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
+
+  @Override
+  public final DocIdSetIterator iterator() throws IOException {
+    if (acceptDocs == null) {
+      // Specialization optimization disregard acceptDocs
+      return new DocIdSetIterator() {
+        private int doc = -1;
+        
+        @Override
+        public int docID() {
+          return doc;
+        }
+      
+        @Override
+        public int nextDoc() {
+          do {
+            doc++;
+            if (doc >= maxDoc) {
+              return doc = NO_MORE_DOCS;
+            }
+          } while (!matchDoc(doc));
+          return doc;
+        }
+      
+        @Override
+        public int advance(int target) {
+          for(doc=target; doc<maxDoc; doc++) {
+            if (matchDoc(doc)) {
+              return doc;
+            }
+          }
+          return doc = NO_MORE_DOCS;
+        }
+      };
+    } else if (acceptDocs instanceof FixedBitSet || acceptDocs instanceof OpenBitSet) {
+      // special case for FixedBitSet / OpenBitSet: use the iterator and filter it
+      // (used e.g. when Filters are chained by FilteredQuery)
+      return new FilteredDocIdSetIterator(((DocIdSet) acceptDocs).iterator()) {
+        @Override
+        protected boolean match(int doc) {
+          return FieldCacheDocIdSet.this.matchDoc(doc);
+        }
+      };
+    } else {
+      // Stupid consultation of acceptDocs and matchDoc()
+      return new DocIdSetIterator() {
+        private int doc = -1;
+        
+        @Override
+        public int docID() {
+          return doc;
+        }
+      
+        @Override
+        public int nextDoc() {
+          do {
+            doc++;
+            if (doc >= maxDoc) {
+              return doc = NO_MORE_DOCS;
+            }
+          } while (!acceptDocs.get(doc) || !matchDoc(doc));
+          return doc;
+        }
+      
+        @Override
+        public int advance(int target) {
+          for(doc=target; doc<maxDoc; doc++) {
+            if (acceptDocs.get(doc) && matchDoc(doc)) {
+              return doc;
+            }
+          }
+          return doc = NO_MORE_DOCS;
+        }
+      };
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=1206033&r1=1206032&r2=1206033&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Fri Nov 25 01:21:53 2011
@@ -124,7 +124,7 @@ public abstract class FieldCacheRangeFil
         
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          final boolean matchDoc(int doc) {
+          protected final boolean matchDoc(int doc) {
             final int docOrd = fcsi.getOrd(doc);
             return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
           }
@@ -175,7 +175,7 @@ public abstract class FieldCacheRangeFil
         final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser, false);
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -225,7 +225,7 @@ public abstract class FieldCacheRangeFil
         final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser, false);
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -275,7 +275,7 @@ public abstract class FieldCacheRangeFil
         final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser, false);
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -325,7 +325,7 @@ public abstract class FieldCacheRangeFil
         final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser, false);
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -379,7 +379,7 @@ public abstract class FieldCacheRangeFil
         final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser, false);
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -434,7 +434,7 @@ public abstract class FieldCacheRangeFil
         // ignore deleted docs if range doesn't contain 0
         return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
           @Override
-          boolean matchDoc(int doc) {
+          protected boolean matchDoc(int doc) {
             return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
           }
         };
@@ -497,122 +497,4 @@ public abstract class FieldCacheRangeFil
   
   /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */
   public FieldCache.Parser getParser() { return parser; }
-  
-  static abstract class FieldCacheDocIdSet extends DocIdSet {
-    private final int maxDoc;
-    private final Bits acceptDocs;
-
-    FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) {
-      this.maxDoc = maxDoc;
-      this.acceptDocs = acceptDocs;
-    }
-
-    /**
-     * this method checks, if a doc is a hit, should throw AIOBE, when position
-     * invalid
-     */
-    abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
-
-    /**
-     * this DocIdSet is always cacheable (does not go back
-     * to the reader for iteration)
-     */
-    @Override
-    public boolean isCacheable() {
-      return true;
-    }
-
-    @Override
-    public Bits bits() {
-      return (acceptDocs == null) ? new Bits() {
-        public boolean get(int docid) {
-          return FieldCacheDocIdSet.this.matchDoc(docid);
-        }
-
-        public int length() {
-          return FieldCacheDocIdSet.this.maxDoc;
-        }
-      } : new Bits() {
-        public boolean get(int docid) {
-          return acceptDocs.get(docid) && FieldCacheDocIdSet.this.matchDoc(docid);
-        }
-
-        public int length() {
-          return FieldCacheDocIdSet.this.maxDoc;
-        }
-      };
-    }
-
-    @Override
-    public DocIdSetIterator iterator() throws IOException {
-      if (acceptDocs == null) {
-        // Specialization optimization disregard deletions
-        return new DocIdSetIterator() {
-          private int doc = -1;
-          @Override
-            public int docID() {
-            return doc;
-          }
-        
-          @Override
-          public int nextDoc() {
-            try {
-              do {
-                doc++;
-              } while (!matchDoc(doc));
-              return doc;
-            } catch (ArrayIndexOutOfBoundsException e) {
-              return doc = NO_MORE_DOCS;
-            }
-          }
-        
-          @Override
-          public int advance(int target) {
-            try {
-              doc = target;
-              while (!matchDoc(doc)) {
-                doc++;
-              }
-              return doc;
-            } catch (ArrayIndexOutOfBoundsException e) {
-              return doc = NO_MORE_DOCS;
-            }
-          }
-        };
-      } else {
-        // Must consult acceptDocs
-
-        // a DocIdSetIterator generating docIds by
-        // incrementing a variable & checking acceptDocs -
-        return new DocIdSetIterator() {
-          private int doc = -1;
-          @Override
-            public int docID() {
-            return doc;
-          }
-        
-          @Override
-          public int nextDoc() {
-            do {
-              doc++;
-              if (doc >= maxDoc) {
-                return doc = NO_MORE_DOCS;
-              }
-            } while (!acceptDocs.get(doc) || !matchDoc(doc));
-            return doc;
-          }
-        
-          @Override
-          public int advance(int target) {
-            for(doc=target;doc<maxDoc;doc++) {
-              if (acceptDocs.get(doc) && matchDoc(doc)) {
-                return doc;
-              }
-            }
-            return doc = NO_MORE_DOCS;
-          }
-        };
-      }
-    }
-  }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java?rev=1206033&r1=1206032&r2=1206033&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java Fri Nov 25 01:21:53 2011
@@ -127,10 +127,9 @@ public class FieldCacheTermsFilter exten
         bits.set(termNumber);
       }
     }
-    final int maxDoc = context.reader.maxDoc();
-    return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
+    return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
       @Override
-      boolean matchDoc(int doc) {
+      protected final boolean matchDoc(int doc) {
         return bits.get(fcsi.getOrd(doc));
       }
     };

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java?rev=1206033&r1=1206032&r2=1206033&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldValueFilter.java Fri Nov 25 01:21:53 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Bits.MatchAllBits;
 import org.apache.lucene.util.Bits.MatchNoBits;
@@ -67,14 +66,9 @@ public class FieldValueFilter extends Fi
       if (docsWithField instanceof MatchAllBits) {
         return null;
       }
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
         @Override
-        final boolean matchDoc(int doc) {
-          if (doc >= maxDoc) {
-            // TODO: this makes no sense we should check this on the caller level
-            throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
-          }
+        protected final boolean matchDoc(int doc) {
           return !docsWithField.get(doc);
         }
       };
@@ -87,14 +81,9 @@ public class FieldValueFilter extends Fi
         // :-)
         return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
       }
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
         @Override
-        final boolean matchDoc(int doc) {
-          if (doc >= maxDoc) {
-            // TODO: this makes no sense we should check this on the caller level
-            throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
-          }
+        protected final boolean matchDoc(int doc) {
           return docsWithField.get(doc);
         }
       };
@@ -131,7 +120,7 @@ public class FieldValueFilter extends Fi
 
   @Override
   public String toString() {
-    return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]";
+    return "FieldValueFilter [field=" + field + ", negate=" + negate + "]";
   }
 
 }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1206033&r1=1206032&r2=1206033&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java Fri Nov 25 01:21:53 2011
@@ -137,10 +137,9 @@ public final class FieldCacheRewriteMeth
         return DocIdSet.EMPTY_DOCIDSET;
       }
       
-      final int maxDoc = context.reader.maxDoc();
-      return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) {
+      return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) {
         @Override
-        boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
+        protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
           return termSet.get(fcsi.getOrd(doc));
         }
       };