You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/11 19:59:54 UTC
svn commit: r1467014 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/CHANGES.txt lucene/core/
lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
Author: rmuir
Date: Thu Apr 11 17:59:53 2013
New Revision: 1467014
URL: http://svn.apache.org/r1467014
Log:
LUCENE-4926: speed up disjunctionmaxscorer
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1467014&r1=1467013&r2=1467014&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Apr 11 17:59:53 2013
@@ -179,6 +179,8 @@ Optimizations
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
(Robert Muir)
+* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir)
+
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=1467014&r1=1467013&r2=1467014&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Thu Apr 11 17:59:53 2013
@@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends Disju
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
private final float tieBreakerMultiplier;
private int doc = -1;
+ private int freq = -1;
/* Used when scoring currently matching doc. */
private float scoreSum;
@@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends Disju
@Override
public int nextDoc() throws IOException {
- if (numScorers == 0) return doc = NO_MORE_DOCS;
- while (subScorers[0].docID() == doc) {
+ assert doc != NO_MORE_DOCS;
+ while(true) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends Disju
return doc = NO_MORE_DOCS;
}
}
+ if (subScorers[0].docID() != doc) {
+ afterNext();
+ return doc;
+ }
}
-
- return doc = subScorers[0].docID();
}
@Override
@@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends Disju
*/
@Override
public float score() throws IOException {
- int doc = subScorers[0].docID();
- scoreSum = scoreMax = subScorers[0].score();
- int size = numScorers;
- scoreAll(1, size, doc);
- scoreAll(2, size, doc);
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
+
+ private void afterNext() throws IOException {
+ doc = subScorers[0].docID();
+ if (doc != NO_MORE_DOCS) {
+ scoreSum = scoreMax = subScorers[0].score();
+ freq = 1;
+ scoreAll(1);
+ scoreAll(2);
+ }
+ }
// Recursively iterate all subScorers that generated last doc computing sum and max
- private void scoreAll(int root, int size, int doc) throws IOException {
- if (root < size && subScorers[root].docID() == doc) {
+ private void scoreAll(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
float sub = subScorers[root].score();
+ freq++;
scoreSum += sub;
scoreMax = Math.max(scoreMax, sub);
- scoreAll((root<<1)+1, size, doc);
- scoreAll((root<<1)+2, size, doc);
+ scoreAll((root<<1)+1);
+ scoreAll((root<<1)+2);
}
}
@Override
public int freq() throws IOException {
- int doc = subScorers[0].docID();
- int size = numScorers;
- return 1 + freq(1, size, doc) + freq(2, size, doc);
- }
-
- // Recursively iterate all subScorers that generated last doc computing sum and max
- private int freq(int root, int size, int doc) throws IOException {
- int freq = 0;
- if (root < size && subScorers[root].docID() == doc) {
- freq++;
- freq += freq((root<<1)+1, size, doc);
- freq += freq((root<<1)+2, size, doc);
- }
return freq;
}
@Override
public int advance(int target) throws IOException {
- if (numScorers == 0) return doc = NO_MORE_DOCS;
- while (subScorers[0].docID() < target) {
+ assert doc != NO_MORE_DOCS;
+ while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends Disju
return doc = NO_MORE_DOCS;
}
}
+ if (subScorers[0].docID() >= target) {
+ afterNext();
+ return doc;
+ }
}
- return doc = subScorers[0].docID();
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java?rev=1467014&r1=1467013&r2=1467014&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java Thu Apr 11 17:59:53 2013
@@ -19,18 +19,24 @@ package org.apache.lucene.search;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import java.text.DecimalFormat;
@@ -469,6 +475,39 @@ public class TestDisjunctionMaxQuery ext
}
}
+ // LUCENE-4477 / LUCENE-4401:
+ public void testBooleanSpanQuery() throws Exception {
+ int hits = 0;
+ Directory directory = newDirectory();
+ Analyzer indexerAnalyzer = new MockAnalyzer(random());
+
+ IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
+ IndexWriter writer = new IndexWriter(directory, config);
+ String FIELD = "content";
+ Document d = new Document();
+ d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
+ writer.addDocument(d);
+ writer.close();
+
+ IndexReader indexReader = DirectoryReader.open(directory);
+ IndexSearcher searcher = newSearcher(indexReader);
+
+ DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
+ SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
+ SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
+ query.add(sq1);
+ query.add(sq2);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
+ searcher.search(query, collector);
+ hits = collector.topDocs().scoreDocs.length;
+ for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
+ System.out.println(scoreDoc.doc);
+ }
+ indexReader.close();
+ assertEquals(hits, 1);
+ directory.close();
+ }
+
/** macro */
protected Query tq(String f, String t) {
return new TermQuery(new Term(f, t));