You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2023/03/27 09:10:53 UTC

[lucene] branch branch_9x updated: Fix ordered intervals query over interleaved terms (#12214)

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new e7648cf6f69 Fix ordered intervals query over interleaved terms (#12214)
e7648cf6f69 is described below

commit e7648cf6f69cfd9db5d0987625db76a4cef4db70
Author: Hongyu Yan <ya...@gmail.com>
AuthorDate: Mon Mar 27 16:18:33 2023 +0800

    Fix ordered intervals query over interleaved terms (#12214)
    
    Given an input text 'A B A C A B C' and search ORDERED(A, B, C), we should
    retrieve hits [0,3] and [4,6]; currently [4,6] is skipped.
    
    After finding the first interval [0, 3], the subintervals will become A[0,0], B[1,1],
    C[3,3]; then the algorithm will try to minimize it and the subintervals will
    become: A:[2,2], B:[5,5], C:[3,3] (after finding 5 > 3 it breaks the minimization)
    
    And when finding next interval, it will do advance(B) before checking whether
    it is after A(the do-while loop), so subintervals will become A[2,2], B[inf, inf],
    C[3,3] and return NO_MORE_INTERVAL.
    
    This commit instead continues advancing subintervals from where the last
    `nextInterval` call stopped, rather than always advancing all subintervals.
---
 lucene/CHANGES.txt                                         |  2 ++
 .../lucene/queries/intervals/OrderedIntervalsSource.java   |  3 +--
 .../apache/lucene/queries/intervals/TestIntervalQuery.java | 14 +++++++++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d81634acea2..5957fb74e2f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -78,6 +78,8 @@ Bug Fixes
 
 * GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries.  (Jasir KT)
 
+* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
+
 Build
 ---------------------
 
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java
index a7472862096..65fa6d03395 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java
@@ -112,7 +112,7 @@ class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
 
   private static class OrderedIntervalIterator extends ConjunctionIntervalIterator {
 
-    int start = -1, end = -1, i;
+    int start = -1, end = -1, i = 1;
     int slop;
     final MatchCallback onMatch;
 
@@ -136,7 +136,6 @@ class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
       start = end = slop = IntervalIterator.NO_MORE_INTERVALS;
       int lastStart = Integer.MAX_VALUE;
       boolean minimizing = false;
-      i = 1;
       while (true) {
         while (true) {
           if (subIterators.get(i - 1).end() >= lastStart) {
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java
index 1275c1db6d4..d9141882c94 100644
--- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java
@@ -84,7 +84,8 @@ public class TestIntervalQuery extends LuceneTestCase {
     "greater new york",
     "x x x x x intend x x x message x x x message x x x addressed x x",
     "issue with intervals queries from search engine. So it's a big issue for us as we need to do ordered searches. Thank you to help us concerning that issue",
-    "場外好朋友"
+    "場外好朋友",
+    "alice bob alice alice carl alice bob alice carl"
   };
 
   private void checkHits(Query query, int[] results) throws IOException {
@@ -348,6 +349,17 @@ public class TestIntervalQuery extends LuceneTestCase {
     checkHits(q, new int[] {});
   }
 
+  public void testOrderedWithGaps2() throws IOException {
+    Query q =
+        new IntervalQuery(
+            field,
+            Intervals.maxgaps(
+                1,
+                Intervals.ordered(
+                    Intervals.term("alice"), Intervals.term("bob"), Intervals.term("carl"))));
+    checkHits(q, new int[] {12});
+  }
+
   public void testNestedOrInContainedBy() throws IOException {
     Query q =
         new IntervalQuery(