You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2007/05/28 21:33:11 UTC

svn commit: r542303 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/

Author: buschmi
Date: Mon May 28 12:33:10 2007
New Revision: 542303

URL: http://svn.apache.org/viewvc?view=rev&rev=542303
Log:
LUCENE-730: Make scoring in docid order the default and add setAllowDocsOutOfOrder() and getAllowDocsOutOfOrder() to BooleanQuery.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java
    lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestBoolean2.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=542303&r1=542302&r2=542303
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon May 28 12:33:10 2007
@@ -56,6 +56,15 @@
     argument, available as tokenStreamValue(). This is useful to avoid the need of 
     "dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch)
 
+11. LUCENE-730: Added the new methods to BooleanQuery setAllowDocsOutOfOrder() and
+    getAllowDocsOutOfOrder(). Deprecated the methods setUseScorer14() and 
+    getUseScorer14(). The optimization patch LUCENE-730 (see Optimizations->3.) 
+    improves performance for certain queries but results in scoring out of docid 
+    order. This patch reverse this change, so now by default hit docs are scored
+    in docid order if not setAllowDocsOutOfOrder(true) is explicitly called.
+    This patch also enables the tests in QueryUtils again that check for docid
+    order. (Paul Elschot, Doron Cohen, Michael Busch)
+
 
 Bug fixes
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java?view=diff&rev=542303&r1=542302&r2=542303
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/BooleanQuery.java Mon May 28 12:33:10 2007
@@ -219,39 +219,13 @@
       }
     }
 
-    /** @return A good old 1.4 Scorer */
+    /** @return Returns BooleanScorer2 that uses and provides skipTo(),
+     *          and scores documents in document number order.
+     */
     public Scorer scorer(IndexReader reader) throws IOException {
-      // First see if the (faster) ConjunctionScorer will work.  This can be
-      // used when all clauses are required.  Also, at this point a
-      // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
-      // from a BooleanScorer are not always sorted by document number (sigh)
-      // and hence BooleanScorer cannot implement skipTo() correctly, which is
-      // required by ConjunctionScorer.
-      boolean allRequired = true;
-      boolean noneBoolean = true;
-      for (int i = 0 ; i < weights.size(); i++) {
-        BooleanClause c = (BooleanClause)clauses.get(i);
-        if (!c.isRequired())
-          allRequired = false;
-        if (c.getQuery() instanceof BooleanQuery)
-          noneBoolean = false;
-      }
-
-      if (allRequired && noneBoolean) {           // ConjunctionScorer is okay
-        ConjunctionScorer result =
-          new ConjunctionScorer(similarity);
-        for (int i = 0 ; i < weights.size(); i++) {
-          Weight w = (Weight)weights.elementAt(i);
-          Scorer subScorer = w.scorer(reader);
-          if (subScorer == null)
-            return null;
-          result.add(subScorer);
-        }
-        return result;
-      }
-
-      // Use good-old BooleanScorer instead.
-      BooleanScorer result = new BooleanScorer(similarity);
+      BooleanScorer2 result = new BooleanScorer2(similarity,
+                                                 minNrShouldMatch,
+                                                 allowDocsOutOfOrder);
 
       for (int i = 0 ; i < weights.size(); i++) {
         BooleanClause c = (BooleanClause)clauses.get(i);
@@ -335,54 +309,48 @@
     }
   }
 
-  private class BooleanWeight2 extends BooleanWeight {
-    /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
-    public BooleanWeight2(Searcher searcher)
-      throws IOException {
-        super(searcher);
-    }
-
-    /** @return An alternative Scorer that uses and provides skipTo(),
-     *          and scores documents in document number order.
-     */
-    public Scorer scorer(IndexReader reader) throws IOException {
-      BooleanScorer2 result = new BooleanScorer2(similarity,
-                                                 minNrShouldMatch);
-
-      for (int i = 0 ; i < weights.size(); i++) {
-        BooleanClause c = (BooleanClause)clauses.get(i);
-        Weight w = (Weight)weights.elementAt(i);
-        Scorer subScorer = w.scorer(reader);
-        if (subScorer != null)
-          result.add(subScorer, c.isRequired(), c.isProhibited());
-        else if (c.isRequired())
-          return null;
-      }
-
-      return result;
-    }
-  }
-
-  /** Indicates whether to use good old 1.4 BooleanScorer. */
-  private static boolean useScorer14 = false;
+  /** Whether hit docs may be collected out of docid order. */
+  private static boolean allowDocsOutOfOrder = false;
 
+  /**
+   * Indicates whether hit docs may be collected out of docid
+   * order. In other words, with this setting, 
+   * {@link HitCollector#collect(int,float)} might be
+   * invoked first for docid N and only later for docid N-1.
+   * Being static, this setting is system wide.
+   * If docs out of order are allowed scoring might be faster
+   * for certain queries (disjunction queries with less than
+   * 32 prohibited terms). This setting has no effect for 
+   * other queries.
+   */
+  public static void setAllowDocsOutOfOrder(boolean allow) {
+    allowDocsOutOfOrder = allow;
+  }  
+  
+  /**
+   * Whether hit docs may be collected out of docid order.
+   * @see #setAllowDocsOutOfOrder(boolean)
+   */
+  public static boolean getAllowDocsOutOfOrder() {
+    return allowDocsOutOfOrder;
+  }  
+
+  /**
+   * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead.
+   */
   public static void setUseScorer14(boolean use14) {
-    useScorer14 = use14;
+    setAllowDocsOutOfOrder(use14);
   }
-
+  
+  /**
+   * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
+   */
   public static boolean getUseScorer14() {
-    return useScorer14;
+    return getAllowDocsOutOfOrder();
   }
 
   protected Weight createWeight(Searcher searcher) throws IOException {
-
-    if (0 < minNrShouldMatch) {
-      // :TODO: should we throw an exception if getUseScorer14 ?
-      return new BooleanWeight2(searcher);
-    }
-
-    return getUseScorer14() ? (Weight) new BooleanWeight(searcher)
-                            : (Weight) new BooleanWeight2(searcher);
+    return new BooleanWeight(searcher);
   }
 
   public Query rewrite(IndexReader reader) throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java?view=diff&rev=542303&r1=542302&r2=542303
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/BooleanScorer2.java Mon May 28 12:33:10 2007
@@ -32,7 +32,6 @@
   private ArrayList optionalScorers = new ArrayList();
   private ArrayList prohibitedScorers = new ArrayList();
 
-
   private class Coordinator {
     int maxCoord = 0; // to be increased for each non prohibited scorer
     
@@ -66,6 +65,12 @@
 
   /** The number of optionalScorers that need to match (if there are any) */
   private final int minNrShouldMatch;
+  
+  /** Whether it is allowed to return documents out of order.
+   *  This can accelerate the scoring of disjunction queries.  
+   */  
+  private boolean allowDocsOutOfOrder;
+
 
   /** Create a BooleanScorer2.
    * @param similarity The similarity to be used.
@@ -74,23 +79,40 @@
    *                         In case no required scorers are added,
    *                         at least one of the optional scorers will have to
    *                         match during the search.
+   * @param allowDocsOutOfOrder Whether it is allowed to return documents out of order.
+   *                            This can accelerate the scoring of disjunction queries.                         
    */
-  public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
+  public BooleanScorer2(Similarity similarity, int minNrShouldMatch, boolean allowDocsOutOfOrder) {
     super(similarity);
     if (minNrShouldMatch < 0) {
       throw new IllegalArgumentException("Minimum number of optional scorers should not be negative");
     }
     coordinator = new Coordinator();
     this.minNrShouldMatch = minNrShouldMatch;
+    this.allowDocsOutOfOrder = allowDocsOutOfOrder;
   }
 
   /** Create a BooleanScorer2.
    *  In no required scorers are added,
    *  at least one of the optional scorers will have to match during the search.
    * @param similarity The similarity to be used.
+   * @param minNrShouldMatch The minimum number of optional added scorers
+   *                         that should match during the search.
+   *                         In case no required scorers are added,
+   *                         at least one of the optional scorers will have to
+   *                         match during the search.
+   */
+  public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
+    this(similarity, minNrShouldMatch, false);
+  }
+  
+  /** Create a BooleanScorer2.
+   *  In no required scorers are added,
+   *  at least one of the optional scorers will have to match during the search.
+   * @param similarity The similarity to be used.
    */
   public BooleanScorer2(Similarity similarity) {
-    this(similarity, 0);
+    this(similarity, 0, false);
   }
 
   public void add(final Scorer scorer, boolean required, boolean prohibited) {
@@ -285,8 +307,8 @@
    * <br>When this method is used the {@link #explain(int)} method should not be used.
    */
   public void score(HitCollector hc) throws IOException {
-    if ((requiredScorers.size() == 0) &&
-        prohibitedScorers.size() < 32) {
+    if (allowDocsOutOfOrder && requiredScorers.size() == 0
+            && prohibitedScorers.size() < 32) {
       // fall back to BooleanScorer, scores documents somewhat out of order
       BooleanScorer bs = new BooleanScorer(getSimilarity(), minNrShouldMatch);
       Iterator si = optionalScorers.iterator();
@@ -372,4 +394,5 @@
   */
   }
 }
+
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java?view=diff&rev=542303&r1=542302&r2=542303
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java Mon May 28 12:33:10 2007
@@ -85,7 +85,7 @@
   public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
     //System.out.println("Checking "+q);
    
-    if (BooleanQuery.getUseScorer14()) return;  // 1.4 doesn't support skipTo
+    if (BooleanQuery.getAllowDocsOutOfOrder()) return;  // 1.4 doesn't support skipTo
 
     final int skip_op = 0;
     final int next_op = 1;
@@ -106,10 +106,6 @@
       final Weight w = q.weight(s);
       final Scorer scorer = w.scorer(s.getIndexReader());
       
-      if (scorer instanceof BooleanScorer || scorer instanceof BooleanScorer2) {
-        return; // TODO change this if BooleanScorers would once again guarantee docs in order. 
-      }
-
       // FUTURE: ensure scorer.doc()==-1
 
       final int[] sdoc = new int[] {-1};

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestBoolean2.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestBoolean2.java?view=diff&rev=542303&r1=542302&r2=542303
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestBoolean2.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestBoolean2.java Mon May 28 12:33:10 2007
@@ -72,16 +72,16 @@
 //System.out.println("Query: " + queryText);
     try {
       Query query1 = makeQuery(queryText);
-      BooleanQuery.setUseScorer14(true);
+      BooleanQuery.setAllowDocsOutOfOrder(true);
       Hits hits1 = searcher.search(query1);
 
       Query query2 = makeQuery(queryText); // there should be no need to parse again...
-      BooleanQuery.setUseScorer14(false);
+      BooleanQuery.setAllowDocsOutOfOrder(false);
       Hits hits2 = searcher.search(query2);
 
       CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs);
     } finally { // even when a test fails.
-      BooleanQuery.setUseScorer14(false);
+      BooleanQuery.setAllowDocsOutOfOrder(false);
     }
   }
 
@@ -168,14 +168,14 @@
         // match up.
         Sort sort = Sort.INDEXORDER;
 
-        BooleanQuery.setUseScorer14(false);
+        BooleanQuery.setAllowDocsOutOfOrder(false);
 
         QueryUtils.check(q1,searcher);
 
         Hits hits1 = searcher.search(q1,sort);
         if (hits1.length()>0) hits1.id(hits1.length()-1);
 
-        BooleanQuery.setUseScorer14(true);
+        BooleanQuery.setAllowDocsOutOfOrder(true);
         Hits hits2 = searcher.search(q1,sort);
         if (hits2.length()>0) hits2.id(hits1.length()-1);
         tot+=hits2.length();
@@ -183,7 +183,7 @@
       }
 
     } finally { // even when a test fails.
-      BooleanQuery.setUseScorer14(false);
+      BooleanQuery.setAllowDocsOutOfOrder(false);
     }
 
     // System.out.println("Total hits:"+tot);