You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/12/30 18:17:27 UTC

svn commit: r1648547 - in /lucene/dev/trunk/lucene: ./ misc/src/java/org/apache/lucene/index/ misc/src/java/org/apache/lucene/search/ misc/src/test/org/apache/lucene/index/ misc/src/test/org/apache/lucene/search/ suggest/src/java/org/apache/lucene/sear...

Author: jpountz
Date: Tue Dec 30 17:17:27 2014
New Revision: 1648547

URL: http://svn.apache.org/r1648547
Log:
LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate when the sort order is a prefix of the index-time order.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java
    lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
    lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java
    lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Dec 30 17:17:27 2014
@@ -186,6 +186,9 @@ Optimizations
 * LUCENE-6133: Improve default StoredFieldsWriter.merge() to be more efficient. 
   (Robert Muir)
 
+* LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate
+  when the sort order is a prefix of the index-time order. (Adrien Grand)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

Modified: lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/index/SortingMergePolicy.java Tue Dec 30 17:17:27 2014
@@ -186,8 +186,9 @@ public final class SortingMergePolicy ex
 
   }
 
-  /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
-  public static boolean isSorted(LeafReader reader, Sort sort) {
+  /** Returns {@code true} if the given {@code reader} is sorted by the
+   *  {@code sort} order of this {@link SortingMergePolicy}. */
+  public boolean isSorted(LeafReader reader) {
     String description = getSortDescription(reader);
     if (description != null && description.equals(sort.toString())) {
       return true;
@@ -228,6 +229,11 @@ public final class SortingMergePolicy ex
     this.sort = sort;
   }
 
+  /** Return the {@link Sort} order that is used to sort segments when merging. */
+  public Sort getSort() {
+    return sort;
+  }
+
   @Override
   public MergeSpecification findMerges(MergeTrigger mergeTrigger,
       SegmentInfos segmentInfos, IndexWriter writer) throws IOException {

Modified: lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java (original)
+++ lucene/dev/trunk/lucene/misc/src/java/org/apache/lucene/search/EarlyTerminatingSortingCollector.java Tue Dec 30 17:17:27 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
+import java.util.Arrays;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.IndexWriter;
@@ -66,10 +67,24 @@ import org.apache.lucene.search.TotalHit
  */
 public class EarlyTerminatingSortingCollector extends FilterCollector {
 
+  /** Returns whether collection can be early-terminated if it sorts with the
+   *  provided {@link Sort} and if segments are merged with the provided
+   *  {@link SortingMergePolicy}. */
+  public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) {
+    final SortField[] fields1 = sort.getSort();
+    final SortField[] fields2 = mergePolicy.getSort().getSort();
+    // early termination is possible if fields1 is a prefix of fields2
+    if (fields1.length > fields2.length) {
+      return false;
+    }
+    return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
+  }
+
   /** Sort used to sort the search results */
   protected final Sort sort;
   /** Number of documents to collect in each segment */
   protected final int numDocsToCollect;
+  private final SortingMergePolicy mergePolicy;
 
   /**
    * Create a new {@link EarlyTerminatingSortingCollector} instance.
@@ -82,19 +97,25 @@ public class EarlyTerminatingSortingColl
    *          the number of documents to collect on each segment. When wrapping
    *          a {@link TopDocsCollector}, this number should be the number of
    *          hits.
+   * @throws IllegalArgumentException if the sort order doesn't allow for early
+   *          termination with the given merge policy.
    */
-  public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
+  public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) {
     super(in);
     if (numDocsToCollect <= 0) {
-      throw new IllegalStateException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
+      throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
+    }
+    if (canEarlyTerminate(sort, mergePolicy) == false) {
+      throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort());
     }
     this.sort = sort;
     this.numDocsToCollect = numDocsToCollect;
+    this.mergePolicy = mergePolicy;
   }
 
   @Override
   public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
-    if (SortingMergePolicy.isSorted(context.reader(), sort)) {
+    if (mergePolicy.isSorted(context.reader())) {
       // segment is sorted, can early-terminate
       return new FilterLeafCollector(super.getLeafCollector(context)) {
         private int numCollected;

Modified: lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/TestSortingMergePolicy.java Tue Dec 30 17:17:27 2014
@@ -70,7 +70,7 @@ public class TestSortingMergePolicy exte
     return doc;
   }
 
-  public static MergePolicy newSortingMergePolicy(Sort sort) {
+  public static SortingMergePolicy newSortingMergePolicy(Sort sort) {
     // usually create a MP with a low merge factor so that many merges happen
     MergePolicy mp;
     int thingToDo = random().nextInt(3);

Modified: lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java (original)
+++ lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/search/TestEarlyTerminatingSortingCollector.java Tue Dec 30 17:17:27 2014
@@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SerialMergeScheduler;
+import org.apache.lucene.index.SortingMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TestSortingMergePolicy;
 import org.apache.lucene.search.LeafCollector;
@@ -59,6 +60,7 @@ public class TestEarlyTerminatingSorting
   private Sort sort;
   private RandomIndexWriter iw;
   private IndexReader reader;
+  private SortingMergePolicy mergePolicy;
 
   @Override
   public void setUp() throws Exception {
@@ -86,7 +88,8 @@ public class TestEarlyTerminatingSorting
     final long seed = random().nextLong();
     final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
     iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
-    iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
+    mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort);
+    iwc.setMergePolicy(mergePolicy);
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
     for (int i = 0; i < numDocs; ++i) {
@@ -134,7 +137,7 @@ public class TestEarlyTerminatingSorting
           query = new MatchAllDocsQuery();
         }
         searcher.search(query, collector1);
-        searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
+        searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy));
         assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
         assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
       }
@@ -142,6 +145,40 @@ public class TestEarlyTerminatingSorting
     }
   }
   
+  public void testCanEarlyTerminate() {
+    assertTrue(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG)),
+        new Sort(new SortField("a", SortField.Type.LONG))));
+
+    assertTrue(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+
+    assertTrue(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG)),
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+
+    assertFalse(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG, true)),
+        new Sort(new SortField("a", SortField.Type.LONG, false))));
+
+    assertFalse(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+        new Sort(new SortField("a", SortField.Type.LONG))));
+
+    assertFalse(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
+
+    assertFalse(canEarlyTerminate(
+        new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
+        new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
+  }
+
+  private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
+    return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
+  }
+
   public void testEarlyTerminationDifferentSorter() throws IOException {
     createRandomIndex();
     final int iters = atLeast(3);
@@ -166,7 +203,7 @@ public class TestEarlyTerminatingSorting
       }
       searcher.search(query, collector1);
       Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
-      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
+      searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
         @Override
         public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
           final LeafCollector ret = super.getLeafCollector(context);

Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1648547&r1=1648546&r2=1648547&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Tue Dec 30 17:17:27 2014
@@ -51,6 +51,7 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.SegmentReader;
@@ -511,7 +512,8 @@ public class AnalyzingInfixSuggester ext
 
     // We sorted postings by weight during indexing, so we
     // only retrieve the first num hits now:
-    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
+    final MergePolicy mergePolicy = writer.getConfig().getMergePolicy();
+    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy);
     IndexSearcher searcher = searcherMgr.acquire();
     List<LookupResult> results = null;
     try {