You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/02/03 18:53:47 UTC
svn commit: r740361 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/search/FilteredDocIdSet.java
src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java
src/test/org/apache/lucene/search/TestDocIdSet.java
Author: mikemccand
Date: Tue Feb 3 17:53:46 2009
New Revision: 740361
URL: http://svn.apache.org/viewvc?rev=740361&view=rev
Log:
LUCENE-1506: add FilteredDocIdSet
Added:
lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java (with props)
lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (with props)
lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java (with props)
Modified:
lucene/java/trunk/CHANGES.txt
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=740361&r1=740360&r2=740361&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Feb 3 17:53:46 2009
@@ -149,6 +149,13 @@
reopen you can change the readOnly of the original reader. (Jason
Rutherglen, Mike McCandless)
+14. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you
+ subclass to implement the "match" method to aceept or reject each
+ docID. Unlike ChainedFilter (under contrib/misc),
+ FilteredDocIdSet never requires you to materialize the full
+ bitset. Instead, match() is called on demand per docID. (John
+ Wang via Mike McCandless)
+
Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
Added: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java?rev=740361&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java Tue Feb 3 17:53:46 2009
@@ -0,0 +1,72 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * Abstract decorator class for a DocIdSet implementation
+ * that provides on-demand filtering/validation
+ * mechanism on a given DocIdSet.
+ *
+ * <p/>
+ *
+ * Technically, this same functionality could be achieved
+ * with ChainedFilter (under contrib/misc), however the
+ * benefit of this class is it never materializes the full
+ * bitset for the filter. Instead, the {@link #match}
+ * method is invoked on-demand, per docID visited during
+ * searching. If you know few docIDs will be visited, and
+ * the logic behind {@link #match} is relatively costly,
+ * this may be a better way to filter than ChainedFilter.
+ *
+ * @see DocIdSet
+ */
+
+public abstract class FilteredDocIdSet extends DocIdSet {
+ private final DocIdSet _innerSet;
+
+ /**
+ * Constructor.
+ * @param innerSet Underlying DocIdSet
+ */
+ public FilteredDocIdSet(DocIdSet innerSet) {
+ _innerSet = innerSet;
+ }
+
+ /**
+ * Validation method to determine whether a docid should be in the result set.
+ * @param docid docid to be tested
+ * @return true if input docid should be in the result set, false otherwise.
+ */
+ protected abstract boolean match(int docid);
+
+ /**
+ * Implementation of the contract to build a DocIdSetIterator.
+ * @see DocIdSetIterator
+ * @see FilteredDocIdSetIterator
+ */
+ // @Override
+ public DocIdSetIterator iterator() throws IOException {
+ return new FilteredDocIdSetIterator(_innerSet.iterator()) {
+ protected boolean match(int docid) {
+ return FilteredDocIdSet.this.match(docid);
+ }
+ };
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSet.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java?rev=740361&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java Tue Feb 3 17:53:46 2009
@@ -0,0 +1,91 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * Abstract decorator class of a DocIdSetIterator
+ * implementation that provides on-demand filter/validation
+ * mechanism on an underlying DocIdSetIterator. See {@link
+ * FilteredDocIdSet}.
+ */
+
+public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
+ protected DocIdSetIterator _innerIter;
+ private int _currentDoc;
+
+ /**
+ * Constructor.
+ * @param innerIter Underlying DocIdSetIterator.
+ */
+ public FilteredDocIdSetIterator(DocIdSetIterator innerIter) {
+ if (innerIter == null) {
+ throw new IllegalArgumentException("null iterator");
+ }
+ _innerIter = innerIter;
+ _currentDoc = -1;
+ }
+
+ /**
+ * Validation method to determine whether a docid should be in the result set.
+ * @param docid docid to be tested
+ * @return true if input docid should be in the result set, false otherwise.
+ * @see #FilteredDocIdSetIterator(DocIdSetIterator).
+ */
+ abstract protected boolean match(int doc);
+
+ // @Override
+ public final int doc() {
+ return _currentDoc;
+ }
+
+ // @Override
+ public final boolean next() throws IOException{
+ while (_innerIter.next()) {
+ int doc = _innerIter.doc();
+ if (match(doc)) {
+ _currentDoc = doc;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // @Override
+ public final boolean skipTo(int n) throws IOException{
+ boolean flag = _innerIter.skipTo(n);
+ if (flag) {
+ int doc = _innerIter.doc();
+ if (match(doc)) {
+ _currentDoc = doc;
+ return true;
+ } else {
+ while (_innerIter.next()) {
+ int docid = _innerIter.doc();
+ if (match(docid)) {
+ _currentDoc = docid;
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+ return flag;
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java?rev=740361&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java Tue Feb 3 17:53:46 2009
@@ -0,0 +1,93 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestDocIdSet extends LuceneTestCase {
+ public void testFilteredDocIdSet() throws Exception {
+ final int maxdoc=10;
+ final DocIdSet innerSet = new DocIdSet() {
+
+ // @Override
+ public DocIdSetIterator iterator() {
+ return new DocIdSetIterator() {
+
+ int docid=-1;
+ //@Override
+ public int doc() {
+ return docid;
+ }
+
+ //@Override
+ public boolean next() throws IOException {
+ docid++;
+ return (docid<maxdoc);
+ }
+
+ //@Override
+ public boolean skipTo(int target) throws IOException {
+ do {
+ if (!next()) {
+ return false;
+ }
+ } while (target > doc());
+
+ return true;
+ }
+ };
+ }
+ };
+
+
+ DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
+ // @Override
+ protected boolean match(int docid) {
+ return docid%2 == 0; //validate only even docids
+ }
+ };
+
+ DocIdSetIterator iter = filteredSet.iterator();
+ ArrayList/*<Integer>*/ list = new ArrayList/*<Integer>*/();
+ if (iter.skipTo(3)) {
+ list.add(new Integer(iter.doc()));
+ while(iter.next()) {
+ list.add(new Integer(iter.doc()));
+ }
+ }
+
+ int[] docs = new int[list.size()];
+ int c=0;
+ Iterator/*<Integer>*/ intIter = list.iterator();
+ while(intIter.hasNext()) {
+ docs[c++] = ((Integer) intIter.next()).intValue();
+ }
+ int[] answer = new int[]{4,6,8};
+ boolean same = Arrays.equals(answer, docs);
+ if (!same) {
+ System.out.println("answer: "+Arrays.toString(answer));
+ System.out.println("gotten: "+Arrays.toString(docs));
+ fail();
+ }
+ }
+}
Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestDocIdSet.java
------------------------------------------------------------------------------
svn:eol-style = native