You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/06/07 14:07:45 UTC

[lucene-solr] branch master updated: LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 67677d9  LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource
67677d9 is described below

commit 67677d995e8fca6214844b0eb814d06138bce0a2
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Fri Jun 7 14:45:56 2019 +0100

    LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource
---
 lucene/CHANGES.txt                                 |  3 +
 .../intervals/DisjunctionIntervalsSource.java      | 18 +++++-
 .../apache/lucene/search/intervals/Intervals.java  | 21 ++-----
 .../NoRewriteDisjunctionIntervalsSource.java       | 33 -----------
 .../search/intervals/UnorderedIntervalsSource.java | 68 ++++------------------
 .../lucene/search/intervals/TestIntervalQuery.java | 16 ++++-
 .../lucene/search/intervals/TestIntervals.java     | 20 +++++--
 .../search/intervals/TestSimplifications.java      | 11 +++-
 8 files changed, 75 insertions(+), 115 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cb27213..b476b73 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Bug Fixes
 * LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances.
   (Vamshi Vijay Nakkirtha via Adrien Grand)
 
+* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(),
+  and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward)
+
 Improvements
 
 * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
index 350a29d..5e81d99 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
@@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue;
 class DisjunctionIntervalsSource extends IntervalsSource {
 
   final Collection<IntervalsSource> subSources;
+  final boolean pullUpDisjunctions;
 
-  public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
+  static IntervalsSource create(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
+    subSources = simplify(subSources);
+    if (subSources.size() == 1) {
+      return subSources.iterator().next();
+    }
+    return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions);
+  }
+
+  private DisjunctionIntervalsSource(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
     this.subSources = simplify(subSources);
+    this.pullUpDisjunctions = pullUpDisjunctions;
   }
 
   private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
@@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
 
   @Override
   public Collection<IntervalsSource> pullUpDisjunctions() {
-    return subSources;
+    if (pullUpDisjunctions) {
+      return subSources;
+    }
+    return Collections.singletonList(this);
   }
 
   static class DisjunctionIntervalIterator extends IntervalIterator {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index 591fbb1..d579c6f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -138,13 +138,7 @@ public final class Intervals {
    * @param subSources   the sources to combine
    */
   public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
-    if (subSources.size() == 1) {
-      return subSources.get(0);
-    }
-    if (rewrite) {
-      return new DisjunctionIntervalsSource(subSources);
-    }
-    return new NoRewriteDisjunctionIntervalsSource(subSources);
+    return DisjunctionIntervalsSource.create(subSources, rewrite);
   }
 
   /**
@@ -227,19 +221,16 @@ public final class Intervals {
    * @param subSources  an unordered set of {@link IntervalsSource}s
    */
   public static IntervalsSource unordered(IntervalsSource... subSources) {
-    return unordered(true, subSources);
+    return UnorderedIntervalsSource.build(Arrays.asList(subSources));
   }
 
   /**
-   * Create an unordered {@link IntervalsSource}
+   * Create an unordered {@link IntervalsSource} allowing no overlaps between subsources
    *
-   * Returns intervals in which all the subsources appear.
-   *
-   * @param subSources  an unordered set of {@link IntervalsSource}s
-   * @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
+   * Returns intervals in which both the subsources appear and do not overlap.
    */
-  public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) {
-    return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps);
+  public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) {
+    return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a));
   }
 
   /**
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java
deleted file mode 100644
index e273f19..0000000
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.search.intervals;
-
-import java.util.Collection;
-import java.util.Collections;
-
-class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
-
-  public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
-    super(subSources);
-  }
-
-  @Override
-  public Collection<IntervalsSource> pullUpDisjunctions() {
-    return Collections.singletonList(this);
-  }
-}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
index adf9cc0..aaf0184 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
@@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue;
 
 class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
 
-  static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) {
+  static IntervalsSource build(List<IntervalsSource> sources) {
     if (sources.size() == 1) {
       return sources.get(0);
     }
-    return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps);
+    return new UnorderedIntervalsSource(flatten(sources));
   }
 
-  private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) {
+  private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
     List<IntervalsSource> flattened = new ArrayList<>();
     for (IntervalsSource s : sources) {
-      if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) {
+      if (s instanceof UnorderedIntervalsSource) {
         flattened.addAll(((UnorderedIntervalsSource)s).subSources);
       }
       else {
@@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
     return flattened;
   }
 
-  private final boolean allowOverlaps;
-
-  private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
+  private UnorderedIntervalsSource(List<IntervalsSource> sources) {
     super(sources, true);
-    this.allowOverlaps = allowOverlaps;
   }
 
   @Override
   protected IntervalIterator combine(List<IntervalIterator> iterators) {
-    return new UnorderedIntervalIterator(iterators, allowOverlaps);
+    return new UnorderedIntervalIterator(iterators);
   }
 
   @Override
@@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
 
   @Override
   public Collection<IntervalsSource> pullUpDisjunctions() {
-    return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps));
+    return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(this.subSources, this.allowOverlaps);
+    return Objects.hash(this.subSources);
   }
 
   @Override
   public boolean equals(Object other) {
     if (other instanceof UnorderedIntervalsSource == false) return false;
     UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
-    return Objects.equals(this.subSources, o.subSources) &&
-        Objects.equals(this.allowOverlaps, o.allowOverlaps);
+    return Objects.equals(this.subSources, o.subSources);
   }
 
   @Override
   public String toString() {
-    return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") +
+    return "UNORDERED(" +
         subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
   }
 
@@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
     private final PriorityQueue<IntervalIterator> queue;
     private final IntervalIterator[] subIterators;
     private final int[] innerPositions;
-    private final boolean allowOverlaps;
 
     int start = -1, end = -1, firstEnd, queueEnd;
 
-    UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
+    UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
       super(subIterators);
       this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
         @Override
@@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
       };
       this.subIterators = new IntervalIterator[subIterators.size()];
       this.innerPositions = new int[subIterators.size() * 2];
-      this.allowOverlaps = allowOverlaps;
 
       for (int i = 0; i < subIterators.size(); i++) {
         this.subIterators[i] = subIterators.get(i);
@@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
       while (this.queue.size() == subIterators.length && queue.top().start() == start) {
         IntervalIterator it = queue.pop();
         if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
-          if (allowOverlaps == false) {
-            while (hasOverlaps(it)) {
-              if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
-                return start = end = IntervalIterator.NO_MORE_INTERVALS;
-            }
-          }
           queue.add(it);
           updateRightExtreme(it);
         }
@@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
           return start;
         IntervalIterator it = queue.pop();
         if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
-          if (allowOverlaps == false) {
-            while (hasOverlaps(it)) {
-              if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
-                return start;
-              }
-            }
-          }
           queue.add(it);
           updateRightExtreme(it);
         }
@@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
     protected void reset() throws IOException {
       queueEnd = start = end = -1;
       this.queue.clear();
-      loop: for (IntervalIterator it : subIterators) {
+      for (IntervalIterator it : subIterators) {
         if (it.nextInterval() == NO_MORE_INTERVALS) {
           break;
         }
-        if (allowOverlaps == false) {
-          while (hasOverlaps(it)) {
-            if (it.nextInterval() == NO_MORE_INTERVALS) {
-              break loop;
-            }
-          }
-        }
         queue.add(it);
         updateRightExtreme(it);
       }
     }
 
-    private boolean hasOverlaps(IntervalIterator candidate) {
-      for (IntervalIterator it : queue) {
-        if (it.start() < candidate.start()) {
-          if (it.end() >= candidate.start()) {
-            return true;
-          }
-          continue;
-        }
-        if (it.start() == candidate.start()) {
-          return true;
-        }
-        if (it.start() <= candidate.end()) {
-          return true;
-        }
-      }
-      return false;
-    }
-
   }
 }
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
index a8998ef..61757b1 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
@@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase {
       "w2 w1 w3 w2 w4",
       "coordinate genome mapping research",
       "coordinate genome research",
-      "greater new york"
+      "greater new york",
+      "x x x x x intend x x x message x x x message x x x addressed x x"
   };
 
   private void checkHits(Query query, int[] results) throws IOException {
@@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase {
     checkHits(q, new int[]{3});
   }
 
+  public void testUnorderedNoOverlaps() throws IOException {
+    Query q = new IntervalQuery(field,
+        Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"),
+          Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"),
+            Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message"))))))));
+    checkHits(q, new int[]{ 9 });
+
+    q = new IntervalQuery(field, Intervals.unorderedNoOverlaps(
+        Intervals.term("w2"),
+        Intervals.or(Intervals.term("w2"), Intervals.term("w3"))));
+    checkHits(q, new int[]{ 0, 1, 2, 3, 5 });
+  }
+
   public void testNestedOrInUnorderedMaxGaps() throws IOException {
     Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
             Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index ea80d5f..8bf7f8a 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase {
   }
 
   public void testUnorderedDistinct() throws IOException {
-    checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")),
+    checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")),
         "field1", 3, new int[][]{
             {},
             { 0, 3, 3, 6 },
@@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase {
             { 0, 3, 3, 6 },
             {}
         });
-    checkIntervals(Intervals.unordered(false,
+    checkIntervals(Intervals.unorderedNoOverlaps(
         Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
         Intervals.term("porridge")),
         "field1", 3, new int[][]{
             {},
-            { 1, 4, 4, 17 },
+            { 1, 4, 2, 7, 4, 17 },
             { 1, 5, 4, 7 },
             {},
-            { 1, 4, 4, 17 },
+            { 1, 4, 2, 7, 4, 17 },
             {}
         });
-    checkIntervals(Intervals.unordered(false,
+    checkIntervals(Intervals.unorderedNoOverlaps(
         Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
         Intervals.term("porridge")),
         "field2", 1, new int[][]{
@@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase {
             { 0, 3 },
             {}
         });
+    checkIntervals(Intervals.unorderedNoOverlaps(
+        Intervals.term("porridge"),
+        Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{
+        {},
+        { 1, 4, 4, 7 },
+        { 1, 4, 4, 7 },
+        {},
+        { 1, 4, 4, 7 },
+        {}
+    });
   }
 
   public void testContainedBy() throws IOException {
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
index 7f64cd4..c97dfbb 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
@@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase {
   }
 
   public void testUnorderedOverlaps() {
-    // UNORDERED_NO_OVERLAPS(term) => term
-    IntervalsSource actual = Intervals.unordered(false, Intervals.term("term"));
-    assertEquals(Intervals.term("term"), actual);
+    // UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term)
+    IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term"));
+    assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual);
+  }
+
+  public void testDisjunctionSingleton() {
+    IntervalsSource actual = Intervals.or(Intervals.term("a"));
+    assertEquals(Intervals.term("a"), actual);
   }
 
   public void testDisjunctionRemovesDuplicates() {