You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/10/25 19:15:58 UTC
svn commit: r1188805 - in /lucene/dev/branches/branch_3x/lucene: CHANGES.txt
src/java/org/apache/lucene/search/FilteredQuery.java
src/java/org/apache/lucene/search/IndexSearcher.java
src/test/org/apache/lucene/search/TestFilteredQuery.java
Author: uschindler
Date: Tue Oct 25 17:15:57 2011
New Revision: 1188805
URL: http://svn.apache.org/viewvc?rev=1188805&view=rev
Log:
LUCENE-3534: Remove filter logic from IndexSearcher and delegate to FilteredQuery's Scorer. This is a partial backport of a cleanup in FilteredQuery/IndexSearcher added by LUCENE-1536 to Lucene 4.0
Modified:
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/FilteredQuery.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1188805&r1=1188804&r2=1188805&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Tue Oct 25 17:15:57 2011
@@ -94,6 +94,11 @@ Optimizations
* LUCENE-3494: Optimize FilteredQuery to remove a multiply in score()
(Uwe Schindler, Robert Muir)
+* LUCENE-3534: Remove filter logic from IndexSearcher and delegate to
+ FilteredQuery's Scorer. This is a partial backport of a cleanup in
+ FilteredQuery/IndexSearcher added by LUCENE-1536 to Lucene 4.0.
+ (Uwe Schindler)
+
Test Cases
* LUCENE-3420: Disable the finalness checks in TokenStream and Analyzer
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/FilteredQuery.java?rev=1188805&r1=1188804&r2=1188805&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/FilteredQuery.java Tue Oct 25 17:15:57 2011
@@ -70,6 +70,10 @@ extends Query {
public float getValue() { return value; }
@Override
+ public boolean scoresDocsOutOfOrder() {
+ return false;
+ }
+
public float sumOfSquaredWeights() throws IOException {
return weight.sumOfSquaredWeights() * getBoost() * getBoost(); // boost sub-weight
}
@@ -105,58 +109,106 @@ extends Query {
// return a filtering scorer
@Override
- public Scorer scorer(IndexReader indexReader, boolean scoreDocsInOrder, boolean topScorer)
- throws IOException {
- final Scorer scorer = weight.scorer(indexReader, true, false);
- if (scorer == null) {
- return null;
- }
- DocIdSet docIdSet = filter.getDocIdSet(indexReader);
- if (docIdSet == null) {
- return null;
- }
- final DocIdSetIterator docIdSetIterator = docIdSet.iterator();
- if (docIdSetIterator == null) {
- return null;
- }
-
- return new Scorer(similarity, this) {
+ public Scorer scorer(IndexReader indexReader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
+ // Hackidy-Häck-Hack for backwards compatibility, as we cannot change IndexSearcher API in 3.x, but still want
+ // to move the searchWithFilter implementation to this class: to enable access to our scorer() implementation
+ // from IndexSearcher, we moved this method up to the main class. In Lucene trunk,
+ // FilteredQuery#getFilteredScorer is inlined here - in 3.x we delegate:
+ return FilteredQuery.getFilteredScorer(indexReader, similarity, weight, this, filter);
+ }
+ };
+ }
+
+ /** Hackidy-Häck-Hack for backwards compatibility, as we cannot change IndexSearcher API in 3.x, but still want
+ * to move the searchWithFilter implementation to this class: to enable access to our scorer() implementation
+ * from IndexSearcher without instantiating a separate {@link Weight}, we make the inner implementation accessible.
+ * @param indexReader the atomic reader
+ * @param similarity the Similarity to use (deprecated)
+ * @param weight the weight to wrap
+ * @param wrapperWeight must be identical to {@code weight} for usage in {@link IndexSearcher}, but it is different inside this query
+ * @param filter the Filter to wrap
+ * @lucene.internal
+ */
+ static Scorer getFilteredScorer(final IndexReader indexReader, final Similarity similarity,
+ final Weight weight, final Weight wrapperWeight, final Filter filter) throws IOException {
+ assert filter != null;
+
+ final DocIdSet filterDocIdSet = filter.getDocIdSet(indexReader);
+ if (filterDocIdSet == null) {
+ // this means the filter does not accept any documents.
+ return null;
+ }
+
+ final DocIdSetIterator filterIter = filterDocIdSet.iterator();
+ if (filterIter == null) {
+ // this means the filter does not accept any documents.
+ return null;
+ }
- private int doc = -1;
-
- private int advanceToCommon(int scorerDoc, int disiDoc) throws IOException {
- while (scorerDoc != disiDoc) {
- if (scorerDoc < disiDoc) {
- scorerDoc = scorer.advance(disiDoc);
- } else {
- disiDoc = docIdSetIterator.advance(scorerDoc);
- }
+ // we are gonna advance() this scorer, so we set inorder=true/toplevel=false
+ final Scorer scorer = weight.scorer(indexReader, true, false);
+ return (scorer == null) ? null : new Scorer(similarity, wrapperWeight) {
+ private int scorerDoc = -1, filterDoc = -1;
+
+ // optimization: we are topScorer and collect directly using short-circuited algo
+ @Override
+ public void score(Collector collector) throws IOException {
+ int filterDoc = filterIter.nextDoc();
+ int scorerDoc = scorer.advance(filterDoc);
+ // the normalization trick already applies the boost of this query,
+ // so we can use the wrapped scorer directly:
+ collector.setScorer(scorer);
+ for (;;) {
+ if (scorerDoc == filterDoc) {
+ // Check if scorer has exhausted, only before collecting.
+ if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
}
+ collector.collect(scorerDoc);
+ filterDoc = filterIter.nextDoc();
+ scorerDoc = scorer.advance(filterDoc);
+ } else if (scorerDoc > filterDoc) {
+ filterDoc = filterIter.advance(scorerDoc);
+ } else {
+ scorerDoc = scorer.advance(filterDoc);
+ }
+ }
+ }
+
+ private int advanceToNextCommonDoc() throws IOException {
+ for (;;) {
+ if (scorerDoc < filterDoc) {
+ scorerDoc = scorer.advance(filterDoc);
+ } else if (scorerDoc == filterDoc) {
return scorerDoc;
+ } else {
+ filterDoc = filterIter.advance(scorerDoc);
}
+ }
+ }
- @Override
- public int nextDoc() throws IOException {
- int scorerDoc, disiDoc;
- return doc = (disiDoc = docIdSetIterator.nextDoc()) != NO_MORE_DOCS
- && (scorerDoc = scorer.nextDoc()) != NO_MORE_DOCS
- && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS ? scorer.docID() : NO_MORE_DOCS;
- }
-
- @Override
- public int docID() { return doc; }
-
- @Override
- public int advance(int target) throws IOException {
- int disiDoc, scorerDoc;
- return doc = (disiDoc = docIdSetIterator.advance(target)) != NO_MORE_DOCS
- && (scorerDoc = scorer.advance(disiDoc)) != NO_MORE_DOCS
- && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS ? scorer.docID() : NO_MORE_DOCS;
- }
+ @Override
+ public int nextDoc() throws IOException {
+ filterDoc = filterIter.nextDoc();
+ return advanceToNextCommonDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > filterDoc) {
+ filterDoc = filterIter.advance(target);
+ }
+ return advanceToNextCommonDoc();
+ }
- @Override
- public float score() throws IOException { return scorer.score(); }
- };
+ @Override
+ public int docID() {
+ return scorerDoc;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return scorer.score();
}
};
}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1188805&r1=1188804&r2=1188805&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Oct 25 17:15:57 2011
@@ -568,64 +568,13 @@ public class IndexSearcher extends Searc
// threaded...? the Collector could be sync'd?
// always use single thread:
- if (filter == null) {
- for (int i = 0; i < subReaders.length; i++) { // search each subreader
- collector.setNextReader(subReaders[i], docBase + docStarts[i]);
- Scorer scorer = weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true);
- if (scorer != null) {
- scorer.score(collector);
- }
- }
- } else {
- for (int i = 0; i < subReaders.length; i++) { // search each subreader
- collector.setNextReader(subReaders[i], docBase + docStarts[i]);
- searchWithFilter(subReaders[i], weight, filter, collector);
- }
- }
- }
-
- private void searchWithFilter(IndexReader reader, Weight weight,
- final Filter filter, final Collector collector) throws IOException {
-
- assert filter != null;
-
- Scorer scorer = weight.scorer(reader, true, false);
- if (scorer == null) {
- return;
- }
-
- int docID = scorer.docID();
- assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS;
-
- // CHECKME: use ConjunctionScorer here?
- DocIdSet filterDocIdSet = filter.getDocIdSet(reader);
- if (filterDocIdSet == null) {
- // this means the filter does not accept any documents.
- return;
- }
-
- DocIdSetIterator filterIter = filterDocIdSet.iterator();
- if (filterIter == null) {
- // this means the filter does not accept any documents.
- return;
- }
- int filterDoc = filterIter.nextDoc();
- int scorerDoc = scorer.advance(filterDoc);
-
- collector.setScorer(scorer);
- while (true) {
- if (scorerDoc == filterDoc) {
- // Check if scorer has exhausted, only before collecting.
- if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
- break;
- }
- collector.collect(scorerDoc);
- filterDoc = filterIter.nextDoc();
- scorerDoc = scorer.advance(filterDoc);
- } else if (scorerDoc > filterDoc) {
- filterDoc = filterIter.advance(scorerDoc);
- } else {
- scorerDoc = scorer.advance(filterDoc);
+ for (int i = 0; i < subReaders.length; i++) { // search each subreader
+ collector.setNextReader(subReaders[i], docBase + docStarts[i]);
+ final Scorer scorer = (filter == null) ?
+ weight.scorer(subReaders[i], !collector.acceptsDocsOutOfOrder(), true) :
+ FilteredQuery.getFilteredScorer(subReaders[i], getSimilarity(), weight, weight, filter);
+ if (scorer != null) {
+ scorer.score(collector);
}
}
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java?rev=1188805&r1=1188804&r2=1188805&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java Tue Oct 25 17:15:57 2011
@@ -81,6 +81,7 @@ public class TestFilteredQuery extends L
writer.close ();
searcher = newSearcher(reader);
+
query = new TermQuery (new Term ("field", "three"));
filter = newStaticFilterB();
}
@@ -105,10 +106,9 @@ public class TestFilteredQuery extends L
directory.close();
super.tearDown();
}
-
- public void testFilteredQuery()
- throws Exception {
- Query filteredquery = new FilteredQuery (query, filter);
+
+ public void testFilteredQuery() throws Exception {
+ Query filteredquery = new FilteredQuery(query, filter);
ScoreDoc[] hits = searcher.search (filteredquery, null, 1000).scoreDocs;
assertEquals (1, hits.length);
assertEquals (1, hits[0].doc);
@@ -195,31 +195,55 @@ public class TestFilteredQuery extends L
QueryUtils.check(random, filteredquery,searcher);
}
- public void testBoolean() throws Exception {
+ public void testBooleanMUST() throws Exception {
BooleanQuery bq = new BooleanQuery();
- Query query = new FilteredQuery(new MatchAllDocsQuery(),
- new SingleDocTestFilter(0));
+ Query query = new FilteredQuery(new MatchAllDocsQuery(), new SingleDocTestFilter(0));
bq.add(query, BooleanClause.Occur.MUST);
- query = new FilteredQuery(new MatchAllDocsQuery(),
- new SingleDocTestFilter(1));
+ query = new FilteredQuery(new MatchAllDocsQuery(), new SingleDocTestFilter(1));
bq.add(query, BooleanClause.Occur.MUST);
ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs;
assertEquals(0, hits.length);
QueryUtils.check(random, query,searcher);
}
+ public void testBooleanSHOULD() throws Exception {
+ BooleanQuery bq = new BooleanQuery();
+ Query query = new FilteredQuery(new MatchAllDocsQuery(), new SingleDocTestFilter(0));
+ bq.add(query, BooleanClause.Occur.SHOULD);
+ query = new FilteredQuery(new MatchAllDocsQuery(), new SingleDocTestFilter(1));
+ bq.add(query, BooleanClause.Occur.SHOULD);
+ ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs;
+ assertEquals(2, hits.length);
+ QueryUtils.check(random, query,searcher);
+ }
+
// Make sure BooleanQuery, which does out-of-order
// scoring, inside FilteredQuery, works
public void testBoolean2() throws Exception {
BooleanQuery bq = new BooleanQuery();
- Query query = new FilteredQuery(bq,
- new SingleDocTestFilter(0));
+ Query query = new FilteredQuery(bq, new SingleDocTestFilter(0));
bq.add(new TermQuery(new Term("field", "one")), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.SHOULD);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
QueryUtils.check(random, query,searcher);
}
+
+ public void testChainedFilters() throws Exception {
+ Query query = new FilteredQuery(new FilteredQuery(
+ new MatchAllDocsQuery(), new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "three"))))),
+ new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "four")))));
+ ScoreDoc[] hits = searcher.search(query, 10).scoreDocs;
+ assertEquals(2, hits.length);
+ QueryUtils.check(random, query, searcher);
+
+ // one more:
+ query = new FilteredQuery(query,
+ new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "five")))));
+ hits = searcher.search(query, 10).scoreDocs;
+ assertEquals(1, hits.length);
+ QueryUtils.check(random, query, searcher);
+ }
}