You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/01/07 14:17:41 UTC
[2/2] lucene-solr:master: LUCENE-8622: Minimum-should-match interval
function
LUCENE-8622: Minimum-should-match interval function
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e015afad
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e015afad
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e015afad
Branch: refs/heads/master
Commit: e015afadaa698a0eb39574198dce31b9a7292a62
Parents: 7d34bfd
Author: Alan Woodward <ro...@apache.org>
Authored: Mon Jan 7 13:53:19 2019 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon Jan 7 14:08:40 2019 +0000
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../intervals/CachingMatchesIterator.java | 132 +++++++
.../lucene/search/intervals/Intervals.java | 7 +
.../MinimizingConjunctionIntervalsSource.java | 122 +-----
.../MinimumShouldMatchIntervalsSource.java | 387 +++++++++++++++++++
.../lucene/search/intervals/TestIntervals.java | 33 ++
6 files changed, 569 insertions(+), 115 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 48f9aa5..7169cf6 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -180,6 +180,9 @@ New Features
* LUCENE-8629: New interval functions: Intervals.before(), Intervals.after(),
Intervals.within() and Intervals.overlapping(). (Alan Woodward)
+* LUCENE-8622: Adds a minimum-should-match interval function that produces intervals
+ spanning a subset of a set of sources. (Alan Woodward)
+
Improvements
* LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/sandbox/src/java/org/apache/lucene/search/intervals/CachingMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/CachingMatchesIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/CachingMatchesIterator.java
new file mode 100644
index 0000000..d522412
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/CachingMatchesIterator.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.FilterMatchesIterator;
+import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.ArrayUtil;
+
+class CachingMatchesIterator extends FilterMatchesIterator {
+
+ private boolean positioned = false;
+ private int[] posAndOffsets = new int[16];
+ private int count = 0;
+
+ CachingMatchesIterator(MatchesIterator in) {
+ super(in);
+ }
+
+ private void cache() throws IOException {
+ count = 0;
+ MatchesIterator mi = in.getSubMatches();
+ if (mi == null) {
+ count = 1;
+ posAndOffsets[0] = in.startPosition();
+ posAndOffsets[1] = in.endPosition();
+ posAndOffsets[2] = in.startOffset();
+ posAndOffsets[3] = in.endOffset();
+ }
+ else {
+ while (mi.next()) {
+ if (count * 4 >= posAndOffsets.length) {
+ posAndOffsets = ArrayUtil.grow(posAndOffsets, (count + 1) * 4);
+ }
+ posAndOffsets[count * 4] = mi.startPosition();
+ posAndOffsets[count * 4 + 1] = mi.endPosition();
+ posAndOffsets[count * 4 + 2] = mi.startOffset();
+ posAndOffsets[count * 4 + 3] = mi.endOffset();
+ count++;
+ }
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (positioned == false) {
+ positioned = true;
+ }
+ else {
+ cache();
+ }
+ return in.next();
+ }
+
+ int startOffset(int endPos) throws IOException {
+ if (endPosition() <= endPos) {
+ return in.startOffset();
+ }
+ return posAndOffsets[2];
+ }
+
+ int endOffset(int endPos) throws IOException {
+ if (endPosition() <= endPos) {
+ return in.endOffset();
+ }
+ return posAndOffsets[count * 4 + 3];
+ }
+
+ MatchesIterator getSubMatches(int endPos) throws IOException {
+ if (endPosition() <= endPos) {
+ cache();
+ }
+ return new MatchesIterator() {
+
+ int upto = -1;
+
+ @Override
+ public boolean next() {
+ upto++;
+ return upto < count;
+ }
+
+ @Override
+ public int startPosition() {
+ return posAndOffsets[upto * 4];
+ }
+
+ @Override
+ public int endPosition() {
+ return posAndOffsets[upto * 4 + 1];
+ }
+
+ @Override
+ public int startOffset() {
+ return posAndOffsets[upto * 4 + 2];
+ }
+
+ @Override
+ public int endOffset() {
+ return posAndOffsets[upto * 4 + 3];
+ }
+
+ @Override
+ public MatchesIterator getSubMatches() {
+ return null;
+ }
+
+ @Override
+ public Query getQuery() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index aa5b62c..1b6dbae 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -259,6 +259,13 @@ public final class Intervals {
}
/**
+ * Return intervals that span combinations of intervals from {@code minShouldMatch} of the sources
+ */
+ public static IntervalsSource atLeast(int minShouldMatch, IntervalsSource... sources) {
+ return new MinimumShouldMatchIntervalsSource(sources, minShouldMatch);
+ }
+
+ /**
* Returns intervals from the source that appear before intervals from the reference
*/
public static IntervalsSource before(IntervalsSource source, IntervalsSource reference) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java
index c509692..6e6f563 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java
@@ -23,11 +23,9 @@ import java.util.List;
import java.util.stream.Collectors;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterMatchesIterator;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
-import org.apache.lucene.util.ArrayUtil;
/**
* A ConjunctionIntervalsSource that attempts to minimize its internal intervals by
@@ -43,13 +41,13 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
- List<CacheingMatchesIterator> subs = new ArrayList<>();
+ List<CachingMatchesIterator> subs = new ArrayList<>();
for (IntervalsSource source : subSources) {
MatchesIterator mi = source.matches(field, ctx, doc);
if (mi == null) {
return null;
}
- subs.add(new CacheingMatchesIterator(mi));
+ subs.add(new CachingMatchesIterator(mi));
}
IntervalIterator it = function.apply(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
if (it.advance(doc) != doc) {
@@ -64,10 +62,10 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
private static class ConjunctionMatchesIterator implements IntervalMatchesIterator {
final IntervalIterator iterator;
- final List<CacheingMatchesIterator> subs;
+ final List<CachingMatchesIterator> subs;
boolean cached = true;
- private ConjunctionMatchesIterator(IntervalIterator iterator, List<CacheingMatchesIterator> subs) {
+ private ConjunctionMatchesIterator(IntervalIterator iterator, List<CachingMatchesIterator> subs) {
this.iterator = iterator;
this.subs = subs;
}
@@ -95,7 +93,7 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
public int startOffset() throws IOException {
int start = Integer.MAX_VALUE;
int endPos = endPosition();
- for (CacheingMatchesIterator s : subs) {
+ for (CachingMatchesIterator s : subs) {
start = Math.min(start, s.startOffset(endPos));
}
return start;
@@ -105,7 +103,7 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
public int endOffset() throws IOException {
int end = 0;
int endPos = endPosition();
- for (CacheingMatchesIterator s : subs) {
+ for (CachingMatchesIterator s : subs) {
end = Math.max(end, s.endOffset(endPos));
}
return end;
@@ -120,7 +118,7 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
public MatchesIterator getSubMatches() throws IOException {
List<MatchesIterator> mis = new ArrayList<>();
int endPos = endPosition();
- for (CacheingMatchesIterator s : subs) {
+ for (CachingMatchesIterator s : subs) {
mis.add(s.getSubMatches(endPos));
}
return MatchesUtils.disjunction(mis);
@@ -132,110 +130,4 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource {
}
}
- private static class CacheingMatchesIterator extends FilterMatchesIterator {
-
- boolean positioned = false;
- int posAndOffsets[] = new int[16];
- int count = 0;
-
- CacheingMatchesIterator(MatchesIterator in) {
- super(in);
- }
-
- private void cache() throws IOException {
- count = 0;
- MatchesIterator mi = in.getSubMatches();
- if (mi == null) {
- count = 1;
- posAndOffsets[0] = in.startPosition();
- posAndOffsets[1] = in.endPosition();
- posAndOffsets[2] = in.startOffset();
- posAndOffsets[3] = in.endOffset();
- }
- else {
- while (mi.next()) {
- if (count * 4 >= posAndOffsets.length) {
- posAndOffsets = ArrayUtil.grow(posAndOffsets, (count + 1) * 4);
- }
- posAndOffsets[count * 4] = mi.startPosition();
- posAndOffsets[count * 4 + 1] = mi.endPosition();
- posAndOffsets[count * 4 + 2] = mi.startOffset();
- posAndOffsets[count * 4 + 3] = mi.endOffset();
- count++;
- }
- }
- }
-
- @Override
- public boolean next() throws IOException {
- if (positioned == false) {
- positioned = true;
- }
- else {
- cache();
- }
- return in.next();
- }
-
- int startOffset(int endPos) throws IOException {
- if (endPosition() <= endPos) {
- return in.startOffset();
- }
- return posAndOffsets[2];
- }
-
- int endOffset(int endPos) throws IOException {
- if (endPosition() <= endPos) {
- return in.endOffset();
- }
- return posAndOffsets[count * 4 + 3];
- }
-
- MatchesIterator getSubMatches(int endPos) throws IOException {
- if (endPosition() <= endPos) {
- cache();
- }
- return new MatchesIterator() {
-
- int upto = -1;
-
- @Override
- public boolean next() {
- upto++;
- return upto < count;
- }
-
- @Override
- public int startPosition() {
- return posAndOffsets[upto * 4];
- }
-
- @Override
- public int endPosition() {
- return posAndOffsets[upto * 4 + 1];
- }
-
- @Override
- public int startOffset() {
- return posAndOffsets[upto * 4 + 2];
- }
-
- @Override
- public int endOffset() {
- return posAndOffsets[upto * 4 + 3];
- }
-
- @Override
- public MatchesIterator getSubMatches() {
- return null;
- }
-
- @Override
- public Query getQuery() {
- throw new UnsupportedOperationException();
- }
- };
- }
-
- }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimumShouldMatchIntervalsSource.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimumShouldMatchIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimumShouldMatchIntervalsSource.java
new file mode 100644
index 0000000..e97f60c
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimumShouldMatchIntervalsSource.java
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.MatchesUtils;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.PriorityQueue;
+
+class MinimumShouldMatchIntervalsSource extends IntervalsSource {
+
+ private final IntervalsSource[] sources;
+ private final int minShouldMatch;
+
+ MinimumShouldMatchIntervalsSource(IntervalsSource[] sources, int minShouldMatch) {
+ this.sources = sources;
+ this.minShouldMatch = minShouldMatch;
+ }
+
+ @Override
+ public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
+ List<IntervalIterator> iterators = new ArrayList<>();
+ for (IntervalsSource source : sources) {
+ IntervalIterator it = source.intervals(field, ctx);
+ if (it != null) {
+ iterators.add(it);
+ }
+ }
+ if (iterators.size() < minShouldMatch) {
+ return null;
+ }
+ return new MinimumShouldMatchIntervalIterator(iterators, minShouldMatch);
+ }
+
+ @Override
+ public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
+ Map<IntervalIterator, CachingMatchesIterator> lookup = new IdentityHashMap<>();
+ for (IntervalsSource source : sources) {
+ MatchesIterator mi = source.matches(field, ctx, doc);
+ if (mi != null) {
+ CachingMatchesIterator cmi = new CachingMatchesIterator(mi);
+ lookup.put(IntervalMatches.wrapMatches(cmi, doc), cmi);
+ }
+ }
+ if (lookup.size() < minShouldMatch) {
+ return null;
+ }
+ MinimumShouldMatchIntervalIterator it = new MinimumShouldMatchIntervalIterator(lookup.keySet(), minShouldMatch);
+ if (it.advance(doc) != doc) {
+ return null;
+ }
+ if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
+ return null;
+ }
+ return new MinimumMatchesIterator(it, lookup);
+ }
+
+ @Override
+ public void extractTerms(String field, Set<Term> terms) {
+ for (IntervalsSource source : sources) {
+ source.extractTerms(field, terms);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "AtLeast("
+ + Arrays.stream(sources).map(IntervalsSource::toString).collect(Collectors.joining(","))
+ + "~" + minShouldMatch + ")";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ MinimumShouldMatchIntervalsSource that = (MinimumShouldMatchIntervalsSource) o;
+ return minShouldMatch == that.minShouldMatch &&
+ Arrays.equals(sources, that.sources);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = Objects.hash(minShouldMatch);
+ result = 31 * result + Arrays.hashCode(sources);
+ return result;
+ }
+
+ // This works as a combination of unordered-AND and OR
+ // First of all, iterators are advanced using a DisjunctionDISIApproximation
+ // Once positioned on a document, nextInterval() is called on each interval, and
+ // those that have intervals are added to an OR-based priority queue (the background queue)
+ // The top-n iterators (where n = minimumShouldMatch) are popped from this queue
+ // and added to an AND-based priority queue (the proximity queue)
+ // Iteration over intervals then proceeds according to the algorithm used by
+ // UnorderedIntervalIterator based on intervals in the proximity queue, with
+ // the one change that when an iterator is popped from the proximity queue, it
+ // is inserted back into the background queue, and replaced by the top iterator
+ // from the background queue.
+ static class MinimumShouldMatchIntervalIterator extends IntervalIterator {
+
+ private final DocIdSetIterator approximation;
+ private final DisiPriorityQueue disiQueue;
+ private final PriorityQueue<IntervalIterator> proximityQueue;
+ private final PriorityQueue<IntervalIterator> backgroundQueue;
+ private final float matchCost;
+ private final int minShouldMatch;
+ private final int[] innerPositions;
+ private final Collection<IntervalIterator> currentIterators = new ArrayList<>();
+
+ private int start, end, queueEnd, firstEnd;
+ private IntervalIterator lead;
+
+ MinimumShouldMatchIntervalIterator(Collection<IntervalIterator> subs, int minShouldMatch) {
+ this.disiQueue = new DisiPriorityQueue(subs.size());
+ float mc = 0;
+ for (IntervalIterator it : subs) {
+ this.disiQueue.add(new DisiWrapper(it));
+ mc += it.matchCost();
+ }
+ this.approximation = new DisjunctionDISIApproximation(disiQueue);
+ this.matchCost = mc;
+ this.minShouldMatch = minShouldMatch;
+ this.innerPositions = new int[minShouldMatch * 2];
+
+ this.proximityQueue = new PriorityQueue<IntervalIterator>(minShouldMatch) {
+ @Override
+ protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
+ return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end());
+ }
+ };
+ this.backgroundQueue = new PriorityQueue<IntervalIterator>(subs.size()) {
+ @Override
+ protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
+ return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start());
+ }
+ };
+ }
+
+ @Override
+ public int start() {
+ return start;
+ }
+
+ @Override
+ public int end() {
+ return end;
+ }
+
+ @Override
+ public int gaps() {
+ int i = 0;
+ for (IntervalIterator it : proximityQueue) {
+ if (it.end() > end) {
+ innerPositions[i * 2] = start;
+ innerPositions[i * 2 + 1] = firstEnd;
+ }
+ else {
+ innerPositions[i * 2] = it.start();
+ innerPositions[i * 2 + 1] = it.end();
+ }
+ i++;
+ }
+ if (proximityQueue.size() < minShouldMatch) {
+ // the leading iterator has been exhausted and removed from the queue
+ innerPositions[i * 2] = start;
+ innerPositions[i * 2 + 1] = firstEnd;
+ }
+ Arrays.sort(innerPositions);
+ int gaps = 0;
+ for (int j = 1; j < minShouldMatch; j++) {
+ gaps += (innerPositions[j * 2] - innerPositions[j * 2 - 1] - 1);
+ }
+ return gaps;
+ }
+
+ @Override
+ public int nextInterval() throws IOException {
+ // first, find a matching interval beyond the current start
+ while (this.proximityQueue.size() == minShouldMatch && proximityQueue.top().start() == start) {
+ IntervalIterator it = proximityQueue.pop();
+ if (it != null && it.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
+ backgroundQueue.add(it);
+ IntervalIterator next = backgroundQueue.pop();
+ assert next != null; // it's just been added!
+ proximityQueue.add(next);
+ updateRightExtreme(next);
+ }
+ }
+ if (this.proximityQueue.size() < minShouldMatch)
+ return start = end = IntervalIterator.NO_MORE_INTERVALS;
+ // then, minimize it
+ do {
+ start = proximityQueue.top().start();
+ firstEnd = proximityQueue.top().end();
+ end = queueEnd;
+ if (proximityQueue.top().end() == end)
+ return start;
+ lead = proximityQueue.pop();
+ if (lead != null) {
+ if (lead.nextInterval() != NO_MORE_INTERVALS) {
+ backgroundQueue.add(lead);
+ }
+ IntervalIterator next = backgroundQueue.pop();
+ if (next != null) {
+ proximityQueue.add(next);
+ updateRightExtreme(next);
+ }
+ }
+ } while (this.proximityQueue.size() == minShouldMatch && end == queueEnd);
+ return start;
+ }
+
+ Collection<IntervalIterator> getCurrentIterators() {
+ currentIterators.clear();
+ currentIterators.add(lead);
+ for (IntervalIterator it : this.proximityQueue) {
+ if (it.end() <= end) {
+ currentIterators.add(it);
+ }
+ }
+ return currentIterators;
+ }
+
+ private void reset() throws IOException {
+ this.proximityQueue.clear();
+ this.backgroundQueue.clear();
+ // First we populate the background queue
+ for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) {
+ if (dw.intervals.nextInterval() != NO_MORE_INTERVALS) {
+ this.backgroundQueue.add(dw.intervals);
+ }
+ }
+ // Then we pop the first minShouldMatch entries and add them to the proximity queue
+ this.queueEnd = -1;
+ for (int i = 0; i < minShouldMatch; i++) {
+ IntervalIterator it = this.backgroundQueue.pop();
+ if (it == null) {
+ break;
+ }
+ this.proximityQueue.add(it);
+ updateRightExtreme(it);
+ }
+ start = end = -1;
+ }
+
+ private void updateRightExtreme(IntervalIterator it) {
+ int itEnd = it.end();
+ if (itEnd > queueEnd) {
+ queueEnd = itEnd;
+ }
+ }
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
+
+ @Override
+ public int docID() {
+ return approximation.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ int doc = approximation.nextDoc();
+ reset();
+ return doc;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int doc = approximation.advance(target);
+ reset();
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return approximation.cost();
+ }
+ }
+
+ static class MinimumMatchesIterator implements IntervalMatchesIterator {
+
+ boolean cached = true;
+ final MinimumShouldMatchIntervalIterator iterator;
+ final Map<IntervalIterator, CachingMatchesIterator> lookup;
+
+ MinimumMatchesIterator(MinimumShouldMatchIntervalIterator iterator,
+ Map<IntervalIterator, CachingMatchesIterator> lookup) {
+ this.iterator = iterator;
+ this.lookup = lookup;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (cached) {
+ cached = false;
+ return true;
+ }
+ return iterator.nextInterval() != IntervalIterator.NO_MORE_INTERVALS;
+ }
+
+ @Override
+ public int startPosition() {
+ return iterator.start();
+ }
+
+ @Override
+ public int endPosition() {
+ return iterator.end();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ int start = Integer.MAX_VALUE;
+ int endPos = endPosition();
+ for (IntervalIterator it : iterator.getCurrentIterators()) {
+ CachingMatchesIterator cms = lookup.get(it);
+ start = Math.min(start, cms.startOffset(endPos));
+ }
+ return start;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ int end = 0;
+ int endPos = endPosition();
+ for (IntervalIterator it : iterator.getCurrentIterators()) {
+ CachingMatchesIterator cms = lookup.get(it);
+ end = Math.max(end, cms.endOffset(endPos));
+ }
+ return end;
+ }
+
+ @Override
+ public int gaps() {
+ return iterator.gaps();
+ }
+
+ @Override
+ public MatchesIterator getSubMatches() throws IOException {
+ List<MatchesIterator> mis = new ArrayList<>();
+ int endPos = endPosition();
+ for (IntervalIterator it : iterator.getCurrentIterators()) {
+ CachingMatchesIterator cms = lookup.get(it);
+ mis.add(cms.getSubMatches(endPos));
+ }
+ return MatchesUtils.disjunction(mis);
+ }
+
+ @Override
+ public Query getQuery() {
+ return null;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e015afad/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index 3276caa..d1c2479 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -533,6 +533,39 @@ public class TestIntervals extends LuceneTestCase {
assertMatch(mi, 4, 8, 12, 26);
}
+ public void testMinimumShouldMatch() throws IOException {
+ IntervalsSource source = Intervals.atLeast(3,
+ Intervals.term("porridge"), Intervals.term("hot"), Intervals.term("twelve"),
+ Intervals.term("nine"), Intervals.term("pease"));
+ checkIntervals(source, "field1", 3, new int[][]{
+ {},
+ {0, 2, 1, 3, 2, 4, 6, 11, 7, 17},
+ {3, 5, 4, 6, 5, 7, 6, 11, 7, 21},
+ {},
+ {0, 2, 1, 3, 2, 4, 6, 11, 7, 17, 11, 21},
+ {}
+ });
+
+ assertGaps(source, 1, "field1", new int[]{0, 0, 0, 3, 8});
+
+ MatchesIterator mi = getMatches(source, 1, "field1");
+ assertMatch(mi, 0, 2, 0, 18);
+ MatchesIterator subs = mi.getSubMatches();
+ assertNotNull(subs);
+ assertMatch(subs, 0, 0, 0, 5);
+ assertMatch(subs, 1, 1, 6, 14);
+ assertMatch(subs, 2, 2, 15, 18);
+ assertFalse(subs.next());
+ assertTrue(mi.next());
+ assertTrue(mi.next());
+ assertMatch(mi, 6, 11, 41, 71);
+ subs = mi.getSubMatches();
+ assertMatch(subs, 6, 6, 41, 46);
+ assertMatch(subs, 7, 7, 47, 55);
+ assertMatch(subs, 11, 11, 67, 71);
+
+ }
+
public void testDefinedGaps() throws IOException {
IntervalsSource source = Intervals.phrase(
Intervals.term("pease"),