You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/11 19:43:17 UTC

svn commit: r1466997 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java

Author: rmuir
Date: Thu Apr 11 17:43:17 2013
New Revision: 1466997

URL: http://svn.apache.org/r1466997
Log:
LUCENE-4926: speed up disjunctionmaxscorer

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1466997&r1=1466996&r2=1466997&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Apr 11 17:43:17 2013
@@ -206,6 +206,8 @@ Optimizations
 * LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
   (Robert Muir)
 
+* LUCENE-4926: Speed up DisjunctionMatchQuery.  (Robert Muir)
+
 API Changes
 
 * LUCENE-4844: removed TaxonomyReader.getParent(), you should use

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=1466997&r1=1466996&r2=1466997&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Thu Apr 11 17:43:17 2013
@@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends Disju
   /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
   private final float tieBreakerMultiplier;
   private int doc = -1;
+  private int freq = -1;
 
   /* Used when scoring currently matching doc. */
   private float scoreSum;
@@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends Disju
 
   @Override
   public int nextDoc() throws IOException {
-    if (numScorers == 0) return doc = NO_MORE_DOCS;
-    while (subScorers[0].docID() == doc) {
+    assert doc != NO_MORE_DOCS;
+    while(true) {
       if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
         heapAdjust(0);
       } else {
@@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends Disju
           return doc = NO_MORE_DOCS;
         }
       }
+      if (subScorers[0].docID() != doc) {
+        afterNext();
+        return doc;
+      }
     }
-    
-    return doc = subScorers[0].docID();
   }
 
   @Override
@@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends Disju
    */
   @Override
   public float score() throws IOException {
-    int doc = subScorers[0].docID();
-    scoreSum = scoreMax = subScorers[0].score();
-    int size = numScorers;
-    scoreAll(1, size, doc);
-    scoreAll(2, size, doc);
     return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
   }
+  
+  private void afterNext() throws IOException {
+    doc = subScorers[0].docID();
+    if (doc != NO_MORE_DOCS) {
+      scoreSum = scoreMax = subScorers[0].score();
+      freq = 1;
+      scoreAll(1);
+      scoreAll(2);
+    }
+  }
 
   // Recursively iterate all subScorers that generated last doc computing sum and max
-  private void scoreAll(int root, int size, int doc) throws IOException {
-    if (root < size && subScorers[root].docID() == doc) {
+  private void scoreAll(int root) throws IOException {
+    if (root < numScorers && subScorers[root].docID() == doc) {
       float sub = subScorers[root].score();
+      freq++;
       scoreSum += sub;
       scoreMax = Math.max(scoreMax, sub);
-      scoreAll((root<<1)+1, size, doc);
-      scoreAll((root<<1)+2, size, doc);
+      scoreAll((root<<1)+1);
+      scoreAll((root<<1)+2);
     }
   }
 
   @Override
   public int freq() throws IOException {
-    int doc = subScorers[0].docID();
-    int size = numScorers;
-    return 1 + freq(1, size, doc) + freq(2, size, doc);
-  }
-  
-  // Recursively iterate all subScorers that generated last doc computing sum and max
-  private int freq(int root, int size, int doc) throws IOException {
-    int freq = 0;
-    if (root < size && subScorers[root].docID() == doc) {
-      freq++;
-      freq += freq((root<<1)+1, size, doc);
-      freq += freq((root<<1)+2, size, doc);
-    }
     return freq;
   }
 
   @Override
   public int advance(int target) throws IOException {
-    if (numScorers == 0) return doc = NO_MORE_DOCS;
-    while (subScorers[0].docID() < target) {
+    assert doc != NO_MORE_DOCS;
+    while(true) {
       if (subScorers[0].advance(target) != NO_MORE_DOCS) {
         heapAdjust(0);
       } else {
@@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends Disju
           return doc = NO_MORE_DOCS;
         }
       }
+      if (subScorers[0].docID() >= target) {
+        afterNext();
+        return doc;
+      }
     }
-    return doc = subScorers[0].docID();
   }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java?rev=1466997&r1=1466996&r2=1466997&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java Thu Apr 11 17:43:17 2013
@@ -19,12 +19,16 @@ package org.apache.lucene.search;
 
 import org.apache.lucene.document.Field;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDoc
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
 
 import java.text.DecimalFormat;
@@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery ext
     }
   }
   
+  // LUCENE-4477 / LUCENE-4401:
+  public void testBooleanSpanQuery() throws Exception {
+    int hits = 0;
+    Directory directory = newDirectory();
+    Analyzer indexerAnalyzer = new MockAnalyzer(random());
+
+    IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
+    IndexWriter writer = new IndexWriter(directory, config);
+    String FIELD = "content";
+    Document d = new Document();
+    d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
+    writer.addDocument(d);
+    writer.close();
+
+    IndexReader indexReader = DirectoryReader.open(directory);
+    IndexSearcher searcher = newSearcher(indexReader);
+
+    DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
+    SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
+    SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
+    query.add(sq1);
+    query.add(sq2);
+    TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
+    searcher.search(query, collector);
+    hits = collector.topDocs().scoreDocs.length;
+    for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
+      System.out.println(scoreDoc.doc);
+    }
+    indexReader.close();
+    assertEquals(hits, 1);
+    directory.close();
+  }
+  
   /** macro */
   protected Query tq(String f, String t) {
     return new TermQuery(new Term(f, t));