You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/06/07 14:07:45 UTC
[lucene-solr] branch master updated: LUCENE-8828: Make
unorderedNoOverlaps a separate IntervalsSource
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 67677d9 LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource
67677d9 is described below
commit 67677d995e8fca6214844b0eb814d06138bce0a2
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Fri Jun 7 14:45:56 2019 +0100
LUCENE-8828: Make unorderedNoOverlaps a separate IntervalsSource
---
lucene/CHANGES.txt | 3 +
.../intervals/DisjunctionIntervalsSource.java | 18 +++++-
.../apache/lucene/search/intervals/Intervals.java | 21 ++-----
.../NoRewriteDisjunctionIntervalsSource.java | 33 -----------
.../search/intervals/UnorderedIntervalsSource.java | 68 ++++------------------
.../lucene/search/intervals/TestIntervalQuery.java | 16 ++++-
.../lucene/search/intervals/TestIntervals.java | 20 +++++--
.../search/intervals/TestSimplifications.java | 11 +++-
8 files changed, 75 insertions(+), 115 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cb27213..b476b73 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Bug Fixes
* LUCENE-8804: Forbid calls to putAttribute on frozen FieldType instances.
(Vamshi Vijay Nakkirtha via Adrien Grand)
+* LUCENE-8828: Removes the buggy 'disallow overlaps' boolean from Intervals.unordered(),
+ and replaces it with a new Intervals.unorderedNoOverlaps() method (Alan Woodward)
+
Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
index 350a29d..5e81d99 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search.intervals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
@@ -38,9 +39,19 @@ import org.apache.lucene.util.PriorityQueue;
class DisjunctionIntervalsSource extends IntervalsSource {
final Collection<IntervalsSource> subSources;
+ final boolean pullUpDisjunctions;
- public DisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
+ static IntervalsSource create(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
+ subSources = simplify(subSources);
+ if (subSources.size() == 1) {
+ return subSources.iterator().next();
+ }
+ return new DisjunctionIntervalsSource(subSources, pullUpDisjunctions);
+ }
+
+ private DisjunctionIntervalsSource(Collection<IntervalsSource> subSources, boolean pullUpDisjunctions) {
this.subSources = simplify(subSources);
+ this.pullUpDisjunctions = pullUpDisjunctions;
}
private static Collection<IntervalsSource> simplify(Collection<IntervalsSource> sources) {
@@ -120,7 +131,10 @@ class DisjunctionIntervalsSource extends IntervalsSource {
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
- return subSources;
+ if (pullUpDisjunctions) {
+ return subSources;
+ }
+ return Collections.singletonList(this);
}
static class DisjunctionIntervalIterator extends IntervalIterator {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index 591fbb1..d579c6f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -138,13 +138,7 @@ public final class Intervals {
* @param subSources the sources to combine
*/
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) {
- if (subSources.size() == 1) {
- return subSources.get(0);
- }
- if (rewrite) {
- return new DisjunctionIntervalsSource(subSources);
- }
- return new NoRewriteDisjunctionIntervalsSource(subSources);
+ return DisjunctionIntervalsSource.create(subSources, rewrite);
}
/**
@@ -227,19 +221,16 @@ public final class Intervals {
* @param subSources an unordered set of {@link IntervalsSource}s
*/
public static IntervalsSource unordered(IntervalsSource... subSources) {
- return unordered(true, subSources);
+ return UnorderedIntervalsSource.build(Arrays.asList(subSources));
}
/**
- * Create an unordered {@link IntervalsSource}
+ * Create an unordered {@link IntervalsSource} allowing no overlaps between subsources
*
- * Returns intervals in which all the subsources appear.
- *
- * @param subSources an unordered set of {@link IntervalsSource}s
- * @param allowOverlaps whether or not the sources should be allowed to overlap in a hit
+ * Returns intervals in which both the subsources appear and do not overlap.
*/
- public static IntervalsSource unordered(boolean allowOverlaps, IntervalsSource... subSources) {
- return UnorderedIntervalsSource.build(Arrays.asList(subSources), allowOverlaps);
+ public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) {
+ return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a));
}
/**
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java
deleted file mode 100644
index e273f19..0000000
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/NoRewriteDisjunctionIntervalsSource.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.search.intervals;
-
-import java.util.Collection;
-import java.util.Collections;
-
-class NoRewriteDisjunctionIntervalsSource extends DisjunctionIntervalsSource {
-
- public NoRewriteDisjunctionIntervalsSource(Collection<IntervalsSource> subSources) {
- super(subSources);
- }
-
- @Override
- public Collection<IntervalsSource> pullUpDisjunctions() {
- return Collections.singletonList(this);
- }
-}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
index adf9cc0..aaf0184 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/UnorderedIntervalsSource.java
@@ -29,17 +29,17 @@ import org.apache.lucene.util.PriorityQueue;
class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
- static IntervalsSource build(List<IntervalsSource> sources, boolean allowOverlaps) {
+ static IntervalsSource build(List<IntervalsSource> sources) {
if (sources.size() == 1) {
return sources.get(0);
}
- return new UnorderedIntervalsSource(flatten(sources, allowOverlaps), allowOverlaps);
+ return new UnorderedIntervalsSource(flatten(sources));
}
- private static List<IntervalsSource> flatten(List<IntervalsSource> sources, boolean allowOverlaps) {
+ private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
- if (s instanceof UnorderedIntervalsSource && ((UnorderedIntervalsSource)s).allowOverlaps == allowOverlaps) {
+ if (s instanceof UnorderedIntervalsSource) {
flattened.addAll(((UnorderedIntervalsSource)s).subSources);
}
else {
@@ -49,16 +49,13 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return flattened;
}
- private final boolean allowOverlaps;
-
- private UnorderedIntervalsSource(List<IntervalsSource> sources, boolean allowOverlaps) {
+ private UnorderedIntervalsSource(List<IntervalsSource> sources) {
super(sources, true);
- this.allowOverlaps = allowOverlaps;
}
@Override
protected IntervalIterator combine(List<IntervalIterator> iterators) {
- return new UnorderedIntervalIterator(iterators, allowOverlaps);
+ return new UnorderedIntervalIterator(iterators);
}
@Override
@@ -72,25 +69,24 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
- return Disjunctions.pullUp(subSources, ss -> new UnorderedIntervalsSource(ss, allowOverlaps));
+ return Disjunctions.pullUp(subSources, UnorderedIntervalsSource::new);
}
@Override
public int hashCode() {
- return Objects.hash(this.subSources, this.allowOverlaps);
+ return Objects.hash(this.subSources);
}
@Override
public boolean equals(Object other) {
if (other instanceof UnorderedIntervalsSource == false) return false;
UnorderedIntervalsSource o = (UnorderedIntervalsSource) other;
- return Objects.equals(this.subSources, o.subSources) &&
- Objects.equals(this.allowOverlaps, o.allowOverlaps);
+ return Objects.equals(this.subSources, o.subSources);
}
@Override
public String toString() {
- return (allowOverlaps ? "UNORDERED(" : "UNORDERED_NO_OVERLAPS(") +
+ return "UNORDERED(" +
subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
}
@@ -99,11 +95,10 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
private final PriorityQueue<IntervalIterator> queue;
private final IntervalIterator[] subIterators;
private final int[] innerPositions;
- private final boolean allowOverlaps;
int start = -1, end = -1, firstEnd, queueEnd;
- UnorderedIntervalIterator(List<IntervalIterator> subIterators, boolean allowOverlaps) {
+ UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
super(subIterators);
this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
@Override
@@ -113,7 +108,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
};
this.subIterators = new IntervalIterator[subIterators.size()];
this.innerPositions = new int[subIterators.size() * 2];
- this.allowOverlaps = allowOverlaps;
for (int i = 0; i < subIterators.size(); i++) {
this.subIterators[i] = subIterators.get(i);
@@ -143,12 +137,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
while (this.queue.size() == subIterators.length && queue.top().start() == start) {
IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
- if (allowOverlaps == false) {
- while (hasOverlaps(it)) {
- if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
- return start = end = IntervalIterator.NO_MORE_INTERVALS;
- }
- }
queue.add(it);
updateRightExtreme(it);
}
@@ -164,13 +152,6 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
return start;
IntervalIterator it = queue.pop();
if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
- if (allowOverlaps == false) {
- while (hasOverlaps(it)) {
- if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
- return start;
- }
- }
- }
queue.add(it);
updateRightExtreme(it);
}
@@ -202,39 +183,14 @@ class UnorderedIntervalsSource extends ConjunctionIntervalsSource {
protected void reset() throws IOException {
queueEnd = start = end = -1;
this.queue.clear();
- loop: for (IntervalIterator it : subIterators) {
+ for (IntervalIterator it : subIterators) {
if (it.nextInterval() == NO_MORE_INTERVALS) {
break;
}
- if (allowOverlaps == false) {
- while (hasOverlaps(it)) {
- if (it.nextInterval() == NO_MORE_INTERVALS) {
- break loop;
- }
- }
- }
queue.add(it);
updateRightExtreme(it);
}
}
- private boolean hasOverlaps(IntervalIterator candidate) {
- for (IntervalIterator it : queue) {
- if (it.start() < candidate.start()) {
- if (it.end() >= candidate.start()) {
- return true;
- }
- continue;
- }
- if (it.start() == candidate.start()) {
- return true;
- }
- if (it.start() <= candidate.end()) {
- return true;
- }
- }
- return false;
- }
-
}
}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
index a8998ef..61757b1 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
@@ -71,7 +71,8 @@ public class TestIntervalQuery extends LuceneTestCase {
"w2 w1 w3 w2 w4",
"coordinate genome mapping research",
"coordinate genome research",
- "greater new york"
+ "greater new york",
+ "x x x x x intend x x x message x x x message x x x addressed x x"
};
private void checkHits(Query query, int[] results) throws IOException {
@@ -220,6 +221,19 @@ public class TestIntervalQuery extends LuceneTestCase {
checkHits(q, new int[]{3});
}
+ public void testUnorderedNoOverlaps() throws IOException {
+ Query q = new IntervalQuery(field,
+ Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("addressed"),
+ Intervals.maxgaps(5, Intervals.unorderedNoOverlaps(Intervals.term("message"),
+ Intervals.maxgaps(3, Intervals.unordered(Intervals.term("intend"), Intervals.term("message"))))))));
+ checkHits(q, new int[]{ 9 });
+
+ q = new IntervalQuery(field, Intervals.unorderedNoOverlaps(
+ Intervals.term("w2"),
+ Intervals.or(Intervals.term("w2"), Intervals.term("w3"))));
+ checkHits(q, new int[]{ 0, 1, 2, 3, 5 });
+ }
+
public void testNestedOrInUnorderedMaxGaps() throws IOException {
Query q = new IntervalQuery(field, Intervals.maxgaps(1, Intervals.unordered(
Intervals.or(Intervals.term("coordinate"), Intervals.phrase("coordinate", "genome")),
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index ea80d5f..8bf7f8a 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -401,7 +401,7 @@ public class TestIntervals extends LuceneTestCase {
}
public void testUnorderedDistinct() throws IOException {
- checkIntervals(Intervals.unordered(false, Intervals.term("pease"), Intervals.term("pease")),
+ checkIntervals(Intervals.unorderedNoOverlaps(Intervals.term("pease"), Intervals.term("pease")),
"field1", 3, new int[][]{
{},
{ 0, 3, 3, 6 },
@@ -410,18 +410,18 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3, 3, 6 },
{}
});
- checkIntervals(Intervals.unordered(false,
+ checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")),
"field1", 3, new int[][]{
{},
- { 1, 4, 4, 17 },
+ { 1, 4, 2, 7, 4, 17 },
{ 1, 5, 4, 7 },
{},
- { 1, 4, 4, 17 },
+ { 1, 4, 2, 7, 4, 17 },
{}
});
- checkIntervals(Intervals.unordered(false,
+ checkIntervals(Intervals.unorderedNoOverlaps(
Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"), Intervals.term("hot")),
Intervals.term("porridge")),
"field2", 1, new int[][]{
@@ -432,6 +432,16 @@ public class TestIntervals extends LuceneTestCase {
{ 0, 3 },
{}
});
+ checkIntervals(Intervals.unorderedNoOverlaps(
+ Intervals.term("porridge"),
+ Intervals.unordered(Intervals.term("pease"), Intervals.term("porridge"))), "field1", 3, new int[][]{
+ {},
+ { 1, 4, 4, 7 },
+ { 1, 4, 4, 7 },
+ {},
+ { 1, 4, 4, 7 },
+ {}
+ });
}
public void testContainedBy() throws IOException {
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
index 7f64cd4..c97dfbb 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestSimplifications.java
@@ -45,9 +45,14 @@ public class TestSimplifications extends LuceneTestCase {
}
public void testUnorderedOverlaps() {
- // UNORDERED_NO_OVERLAPS(term) => term
- IntervalsSource actual = Intervals.unordered(false, Intervals.term("term"));
- assertEquals(Intervals.term("term"), actual);
+ // UNORDERED_NO_OVERLAPS(term, term) => ORDERED(term, term)
+ IntervalsSource actual = Intervals.unorderedNoOverlaps(Intervals.term("term"), Intervals.term("term"));
+ assertEquals(Intervals.ordered(Intervals.term("term"), Intervals.term("term")), actual);
+ }
+
+ public void testDisjunctionSingleton() {
+ IntervalsSource actual = Intervals.or(Intervals.term("a"));
+ assertEquals(Intervals.term("a"), actual);
}
public void testDisjunctionRemovesDuplicates() {