You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2018/04/11 09:47:29 UTC
[1/3] lucene-solr:master: LUCENE-8229: Add Weight.matches() to
iterate over match positions
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x 2376b6c51 -> 502fd4bf1
refs/heads/master 5b250b4a4 -> 040a9601b
LUCENE-8229: Add Weight.matches() to iterate over match positions
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/040a9601
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/040a9601
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/040a9601
Branch: refs/heads/master
Commit: 040a9601b1b346391ad37e5a0a4f2f598e72d26e
Parents: 5b250b4
Author: Alan Woodward <ro...@apache.org>
Authored: Wed Apr 11 09:43:27 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Wed Apr 11 09:43:27 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +
.../org/apache/lucene/search/BooleanWeight.java | 35 ++
.../lucene/search/ConstantScoreQuery.java | 5 +
.../search/DisjunctionMatchesIterator.java | 171 +++++++
.../lucene/search/DisjunctionMaxQuery.java | 12 +
.../lucene/search/DocValuesRewriteMethod.java | 20 +-
.../org/apache/lucene/search/FilterWeight.java | 4 +
.../lucene/search/IndexOrDocValuesQuery.java | 6 +
.../org/apache/lucene/search/LRUQueryCache.java | 5 +
.../java/org/apache/lucene/search/Matches.java | 146 ++++++
.../apache/lucene/search/MatchesIterator.java | 79 ++++
.../MultiTermQueryConstantScoreWrapper.java | 14 +-
.../org/apache/lucene/search/SynonymQuery.java | 13 +-
.../apache/lucene/search/TermInSetQuery.java | 9 +
.../lucene/search/TermMatchesIterator.java | 77 ++++
.../org/apache/lucene/search/TermQuery.java | 21 +-
.../java/org/apache/lucene/search/Weight.java | 29 ++
.../lucene/search/TestMatchesIterator.java | 440 +++++++++++++++++++
.../search/join/ToParentBlockJoinQuery.java | 23 +
.../queries/function/FunctionScoreQuery.java | 6 +
.../org/apache/lucene/search/CoveringQuery.java | 22 +
.../apache/lucene/search/AssertingMatches.java | 43 ++
.../lucene/search/AssertingMatchesIterator.java | 77 ++++
.../apache/lucene/search/AssertingWeight.java | 8 +
.../org/apache/lucene/search/CheckHits.java | 55 ++-
.../org/apache/lucene/search/QueryUtils.java | 2 +
26 files changed, 1317 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 79b984a..d5c3f13 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,11 @@ New Features
soft deletes if the reader is opened form a directory. (Simon Willnauer,
Mike McCandless, Uwe Schindler, Adrien Grand)
+* LUCENE-8229: Add a method Weight.matches(LeafReaderContext, doc) that returns
+ an iterator over matching positions for a given query and document. This
+ allows exact hit extraction and will enable implementation of accurate
+ highlighters. (Alan Woodward, Adrien Grand, David Smiley)
+
Bug Fixes
* LUCENE-8234: Fixed bug in how spatial relationship is computed for
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
index fffdd09..1fddd7c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@@ -119,6 +119,41 @@ final class BooleanWeight extends Weight {
}
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ final int minShouldMatch = query.getMinimumNumberShouldMatch();
+ List<Matches> matches = new ArrayList<>();
+ int shouldMatchCount = 0;
+ Iterator<Weight> wIt = weights.iterator();
+ Iterator<BooleanClause> cIt = query.clauses().iterator();
+ while (wIt.hasNext()) {
+ Weight w = wIt.next();
+ BooleanClause bc = cIt.next();
+ Matches m = w.matches(context, doc);
+ if (bc.isProhibited()) {
+ if (m != null) {
+ return null;
+ }
+ }
+ if (bc.isRequired()) {
+ if (m == null) {
+ return null;
+ }
+ matches.add(m);
+ }
+ if (bc.getOccur() == Occur.SHOULD) {
+ if (m != null) {
+ matches.add(m);
+ shouldMatchCount++;
+ }
+ }
+ }
+ if (shouldMatchCount < minShouldMatch) {
+ return null;
+ }
+ return Matches.fromSubMatches(matches);
+ }
+
static BulkScorer disableScoring(final BulkScorer scorer) {
return new BulkScorer() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 464cde6..d696c21 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -159,6 +159,11 @@ public final class ConstantScoreQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return innerWeight.matches(context, doc);
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
new file mode 100644
index 0000000..37770d2
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * A {@link MatchesIterator} that combines matches from a set of sub-iterators
+ *
+ * Matches are sorted by their start positions, and then by their end positions, so that
+ * prefixes sort first. Matches may overlap, or be duplicated if they appear in more
+ * than one of the sub-iterators.
+ */
+final class DisjunctionMatchesIterator implements MatchesIterator {
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms
+ *
+ * Only terms that have at least one match in the given document will be included
+ */
+ static MatchesIterator fromTerms(LeafReaderContext context, int doc, String field, List<Term> terms) throws IOException {
+ Objects.requireNonNull(field);
+ for (Term term : terms) {
+ if (Objects.equals(field, term.field()) == false) {
+ throw new IllegalArgumentException("Tried to generate iterator from terms in multiple fields: expected [" + field + "] but got [" + term.field() + "]");
+ }
+ }
+ return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
+ }
+
+ private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
+ return new BytesRefIterator() {
+ int i = 0;
+ @Override
+ public BytesRef next() {
+ if (i >= terms.size())
+ return null;
+ return terms.get(i++).bytes();
+ }
+ };
+ }
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
+ *
+ * Only terms that have at least one match in the given document will be included
+ */
+ static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, String field, BytesRefIterator terms) throws IOException {
+ Objects.requireNonNull(field);
+ List<MatchesIterator> mis = new ArrayList<>();
+ Terms t = context.reader().terms(field);
+ if (t == null)
+ return null;
+ TermsEnum te = t.iterator();
+ PostingsEnum reuse = null;
+ for (BytesRef term = terms.next(); term != null; term = terms.next()) {
+ if (te.seekExact(term)) {
+ PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
+ if (pe.advance(doc) == doc) {
+ // TODO do we want to use the copied term here, or instead create a label that associates all of the TMIs with a single term?
+ mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
+ reuse = null;
+ }
+ else {
+ reuse = pe;
+ }
+ }
+ }
+ if (mis.size() == 0)
+ return null;
+ if (mis.size() == 1)
+ return mis.get(0);
+ return new DisjunctionMatchesIterator(mis);
+ }
+
+ static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
+ if (mis.size() == 0)
+ return null;
+ if (mis.size() == 1)
+ return mis.get(0);
+ return new DisjunctionMatchesIterator(mis);
+ }
+
+ private final PriorityQueue<MatchesIterator> queue;
+
+ private boolean started = false;
+
+ private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
+ queue = new PriorityQueue<MatchesIterator>(matches.size()){
+ @Override
+ protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
+ return a.startPosition() < b.startPosition() ||
+ (a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition()) ||
+ (a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition() && a.term().compareTo(b.term()) < 0);
+ }
+ };
+ for (MatchesIterator mi : matches) {
+ if (mi.next()) {
+ queue.add(mi);
+ }
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (started == false) {
+ return started = true;
+ }
+ if (queue.top().next() == false) {
+ queue.pop();
+ }
+ if (queue.size() > 0) {
+ queue.updateTop();
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int startPosition() {
+ return queue.top().startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return queue.top().endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return queue.top().startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return queue.top().endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ return queue.top().term();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index 1e67cb1..5b99c3c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -118,6 +118,18 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
}
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ List<Matches> mis = new ArrayList<>();
+ for (Weight weight : weights) {
+ Matches mi = weight.matches(context, doc);
+ if (mi != null) {
+ mis.add(mi);
+ }
+ }
+ return Matches.fromSubMatches(mis);
+ }
+
/** Create the scorer used to score our associated DisjunctionMaxQuery */
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 5d59198..99720b7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -74,11 +74,16 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
+
@Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
- TermsEnum termsEnum = query.getTermsEnum(new Terms() {
-
+ return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, getTermsEnum(fcsi)));
+ }
+
+ private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
+ return query.getTermsEnum(new Terms() {
+
@Override
public TermsEnum iterator() throws IOException {
return fcsi.termsEnum();
@@ -118,13 +123,18 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
public boolean hasPositions() {
return false;
}
-
+
@Override
public boolean hasPayloads() {
return false;
}
});
-
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
+ TermsEnum termsEnum = getTermsEnum(fcsi);
assert termsEnum != null;
if (termsEnum.next() == null) {
// no matching terms
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
index 925c953..8a2b57b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
@@ -75,4 +75,8 @@ public abstract class FilterWeight extends Weight {
return in.scorer(context);
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return in.matches(context, doc);
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
index f89924d..d69421e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@@ -120,6 +120,12 @@ public final class IndexOrDocValuesQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ // We need to check a single doc, so the dv query should perform better
+ return dvWeight.matches(context, doc);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better
return dvWeight.explain(context, doc);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index 72239e8..9391afd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -678,6 +678,11 @@ public class LRUQueryCache implements QueryCache, Accountable {
in.extractTerms(terms);
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return in.matches(context, doc);
+ }
+
private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
// The worst-case (dense) is a bit set which needs one bit per document
final long worstCaseRamUsage = maxDoc / 8;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/Matches.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Matches.java b/lucene/core/src/java/org/apache/lucene/search/Matches.java
new file mode 100644
index 0000000..3670563
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/Matches.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Reports the positions and optionally offsets of all matching terms in a query
+ * for a single document
+ *
+ * To obtain a {@link MatchesIterator} for a particular field, call {@link #getMatches(String)}.
+ * Note that you can call {@link #getMatches(String)} multiple times to retrieve new
+ * iterators, but it is not thread-safe.
+ */
+public interface Matches extends Iterable<String> {
+
+ /**
+ * Returns a {@link MatchesIterator} over the matches for a single field,
+ * or {@code null} if there are no matches in that field.
+ */
+ MatchesIterator getMatches(String field) throws IOException;
+
+ /**
+ * Indicates a match with no term positions, for example on a Point or DocValues field,
+ * or a field indexed as docs and freqs only
+ */
+ Matches MATCH_WITH_NO_TERMS = new Matches() {
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public MatchesIterator getMatches(String field) {
+ return null;
+ }
+ };
+
+ /**
+ * Amalgamate a collection of {@link Matches} into a single object
+ */
+ static Matches fromSubMatches(List<Matches> subMatches) {
+ if (subMatches == null || subMatches.size() == 0) {
+ return null;
+ }
+ List<Matches> sm = subMatches.stream().filter(m -> m != MATCH_WITH_NO_TERMS).collect(Collectors.toList());
+ if (sm.size() == 0) {
+ return MATCH_WITH_NO_TERMS;
+ }
+ if (sm.size() == 1) {
+ return sm.get(0);
+ }
+ Set<String> fields = new HashSet<>();
+ for (Matches m : sm) {
+ for (String field : m) {
+ fields.add(field);
+ }
+ }
+ return new Matches() {
+ @Override
+ public MatchesIterator getMatches(String field) throws IOException {
+ List<MatchesIterator> subIterators = new ArrayList<>();
+ for (Matches m : sm) {
+ MatchesIterator it = m.getMatches(field);
+ if (it != null) {
+ subIterators.add(it);
+ }
+ }
+ return DisjunctionMatchesIterator.fromSubIterators(subIterators);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return fields.iterator();
+ }
+ };
+ }
+
+ /**
+ * A functional interface that supplies a {@link MatchesIterator}
+ */
+ @FunctionalInterface
+ interface MatchesIteratorSupplier {
+ /** Return a new {@link MatchesIterator} */
+ MatchesIterator get() throws IOException;
+ }
+
+ /**
+ * Create a Matches for a single field
+ */
+ static Matches forField(String field, MatchesIteratorSupplier mis) throws IOException {
+
+ // The indirection here, using a Supplier object rather than a MatchesIterator
+ // directly, is to allow for multiple calls to Matches.getMatches() to return
+ // new iterators. We still need to call MatchesIteratorSupplier.get() eagerly
+ // to work out if we have a hit or not.
+
+ MatchesIterator mi = mis.get();
+ if (mi == null) {
+ return null;
+ }
+ return new Matches() {
+ boolean cached = true;
+ @Override
+ public MatchesIterator getMatches(String f) throws IOException {
+ if (Objects.equals(field, f) == false) {
+ return null;
+ }
+ if (cached == false) {
+ return mis.get();
+ }
+ cached = false;
+ return mi;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.singleton(field).iterator();
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
new file mode 100644
index 0000000..b874263
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * An iterator over match positions (and optionally offsets) for a single document and field
+ *
+ * To iterate over the matches, call {@link #next()} until it returns {@code false}, retrieving
+ * positions and/or offsets after each call. You should not call the position or offset methods
+ * before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
+ *
+ * Matches are ordered by start position, and then by end position. Match intervals may overlap.
+ *
+ * @see Weight#matches(LeafReaderContext, int)
+ */
+public interface MatchesIterator {
+
+ /**
+ * Advance the iterator to the next match position
+ * @return {@code true} if matches have not been exhausted
+ */
+ boolean next() throws IOException;
+
+ /**
+ * The start position of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int startPosition();
+
+ /**
+ * The end position of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int endPosition();
+
+ /**
+ * The starting offset of the current match, or {@code -1} if offsets are not available
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int startOffset() throws IOException;
+
+ /**
+ * The ending offset of the current match, or {@code -1} if offsets are not available
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int endOffset() throws IOException;
+
+ /**
+ * The underlying term of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ BytesRef term();
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
index 3a46b96..2299776 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
@@ -25,8 +25,8 @@ import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -203,6 +203,18 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ final Terms terms = context.reader().terms(query.field);
+ if (terms == null) {
+ return null;
+ }
+ if (terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, query.getTermsEnum(terms)));
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
index 2a7c450..a364c9a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@@ -30,8 +30,9 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
@@ -160,6 +161,16 @@ public final class SynonymQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ String field = terms[0].field();
+ Terms terms = context.reader().terms(field);
+ if (terms == null || terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, field, Arrays.asList(SynonymQuery.this.terms)));
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
index a8bf5b0..7145a83 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@@ -220,6 +220,15 @@ public class TermInSetQuery extends Query implements Accountable {
// order to protect highlighters
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Terms terms = context.reader().terms(field);
+ if (terms == null || terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, field, termData.iterator()));
+ }
+
/**
* On the given leaf context, try to either rewrite to a disjunction if
* there are few matching terms, or build a bitset containing matching docs.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
new file mode 100644
index 0000000..0516996
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link MatchesIterator} over a single term's postings list
+ */
+class TermMatchesIterator implements MatchesIterator {
+
+ private int upto;
+ private int pos;
+ private final PostingsEnum pe;
+ private final BytesRef term;
+
+ /**
+ * Create a new {@link TermMatchesIterator} for the given term and postings list
+ */
+ TermMatchesIterator(BytesRef term, PostingsEnum pe) throws IOException {
+ this.pe = pe;
+ this.upto = pe.freq();
+ this.term = term;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (upto-- > 0) {
+ pos = pe.nextPosition();
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int startPosition() {
+ return pos;
+ }
+
+ @Override
+ public int endPosition() {
+ return pos;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return pe.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return pe.endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ return term;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index f1f4415..b86f340 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -25,10 +25,11 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
@@ -81,6 +82,24 @@ public class TermQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ TermsEnum te = getTermsEnum(context);
+ if (te == null) {
+ return null;
+ }
+ if (context.reader().terms(term.field()).hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(term.field(), () -> {
+ PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
+ if (pe.advance(doc) != doc) {
+ return null;
+ }
+ return new TermMatchesIterator(term.bytes(), pe);
+ });
+ }
+
+ @Override
public String toString() {
return "weight(" + TermQuery.this + ")";
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/java/org/apache/lucene/search/Weight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 7853ccf..3281b41 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -70,6 +70,35 @@ public abstract class Weight implements SegmentCacheable {
public abstract void extractTerms(Set<Term> terms);
/**
+ * Returns {@link Matches} for a specific document, or {@code null} if the document
+ * does not match the parent query
+ *
+ * A query match that contains no position information (for example, a Point or
+ * DocValues query) will return {@link Matches#MATCH_WITH_NO_TERMS}
+ *
+ * @param context the reader's context to create the {@link Matches} for
+ * @param doc the document's id relative to the given context's reader
+ */
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Scorer scorer = scorer(context);
+ if (scorer == null) {
+ return null;
+ }
+ final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+ if (twoPhase == null) {
+ if (scorer.iterator().advance(doc) != doc) {
+ return null;
+ }
+ }
+ else {
+ if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
+ return null;
+ }
+ }
+ return Matches.MATCH_WITH_NO_TERMS;
+ }
+
+ /**
* An explanation of the score computation for the named document.
*
* @param context the readers context to create the {@link Explanation} for.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
new file mode 100644
index 0000000..3b3dd32
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestMatchesIterator extends LuceneTestCase {
+
+ protected IndexSearcher searcher;
+ protected Directory directory;
+ protected IndexReader reader;
+
+ private static final String FIELD_WITH_OFFSETS = "field_offsets";
+ private static final String FIELD_NO_OFFSETS = "field_no_offsets";
+ private static final String FIELD_DOCS_ONLY = "field_docs_only";
+ private static final String FIELD_FREQS = "field_freqs";
+
+ private static final FieldType OFFSETS = new FieldType(TextField.TYPE_STORED);
+ static {
+ OFFSETS.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ }
+
+ private static final FieldType DOCS = new FieldType(TextField.TYPE_STORED);
+ static {
+ DOCS.setIndexOptions(IndexOptions.DOCS);
+ }
+
+ private static final FieldType DOCS_AND_FREQS = new FieldType(TextField.TYPE_STORED);
+ static {
+ DOCS_AND_FREQS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ directory.close();
+ super.tearDown();
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
+ newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+ for (int i = 0; i < docFields.length; i++) {
+ Document doc = new Document();
+ doc.add(newField(FIELD_WITH_OFFSETS, docFields[i], OFFSETS));
+ doc.add(newField(FIELD_NO_OFFSETS, docFields[i], TextField.TYPE_STORED));
+ doc.add(newField(FIELD_DOCS_ONLY, docFields[i], DOCS));
+ doc.add(newField(FIELD_FREQS, docFields[i], DOCS_AND_FREQS));
+ doc.add(new NumericDocValuesField("id", i));
+ doc.add(newField("id", Integer.toString(i), TextField.TYPE_STORED));
+ writer.addDocument(doc);
+ }
+ writer.forceMerge(1);
+ reader = writer.getReader();
+ writer.close();
+ searcher = newSearcher(getOnlyLeafReader(reader));
+ }
+
+ protected String[] docFields = {
+ "w1 w2 w3 w4 w5",
+ "w1 w3 w2 w3 zz",
+ "w1 xx w2 yy w4",
+ "w1 w2 w1 w4 w2 w3",
+ "nothing matches this document"
+ };
+
+ void checkMatches(Query q, String field, int[][] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
+ int doc = expected[i][0] - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (matches == null) {
+ assertEquals(expected[i].length, 1);
+ continue;
+ }
+ MatchesIterator it = matches.getMatches(field);
+ if (expected[i].length == 1) {
+ assertNull(it);
+ return;
+ }
+ checkFieldMatches(it, expected[i]);
+ checkFieldMatches(matches.getMatches(field), expected[i]); // test multiple calls
+ }
+ }
+
+ void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
+ int pos = 1;
+ while (it.next()) {
+ //System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos + 2] + "->" + expected[i][pos + 3] + "]");
+ assertEquals(expected[pos], it.startPosition());
+ assertEquals(expected[pos + 1], it.endPosition());
+ assertEquals(expected[pos + 2], it.startOffset());
+ assertEquals(expected[pos + 3], it.endOffset());
+ pos += 4;
+ }
+ assertEquals(expected.length, pos);
+ }
+
+ void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+ int doc = i - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (expected[i]) {
+ MatchesIterator mi = matches.getMatches(field);
+ assertNull(mi);
+ }
+ else {
+ assertNull(matches);
+ }
+ }
+ }
+
+ void checkTerms(Query q, String field, String[][] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+ int doc = i - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (matches == null) {
+ assertEquals(expected[i].length, 0);
+ continue;
+ }
+ MatchesIterator it = matches.getMatches(field);
+ if (it == null) {
+ assertEquals(expected[i].length, 0);
+ continue;
+ }
+ int pos = 0;
+ while (it.next()) {
+ assertEquals(expected[i][pos], it.term().utf8ToString());
+ pos += 1;
+ }
+ assertEquals(expected[i].length, pos);
+ }
+ }
+
+ public void testTermQuery() throws IOException {
+ Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2 },
+ { 1, 0, 0, 0, 2 },
+ { 2, 0, 0, 0, 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 4 }
+ });
+ }
+
+ public void testTermQueryNoStoredOffsets() throws IOException {
+ Query q = new TermQuery(new Term(FIELD_NO_OFFSETS, "w1"));
+ checkMatches(q, FIELD_NO_OFFSETS, new int[][]{
+ { 0, 0, 0, -1, -1 },
+ { 1, 0, 0, -1, -1 },
+ { 2, 0, 0, -1, -1 },
+ { 3, 0, 0, -1, -1, 2, 2, -1, -1 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_NO_OFFSETS, new String[][]{
+ { "w1" },
+ { "w1" },
+ { "w1" },
+ { "w1", "w1" },
+ {}
+ });
+ }
+
+ public void testTermQueryNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new TermQuery(new Term(field, "w1"));
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testDisjunction() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2, 0, 0, 0, 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ { "w1", "w3" },
+ { "w1", "w3", "w3" },
+ { "w1" },
+ { "w1", "w1", "w3" },
+ {}
+ });
+ }
+
+ public void testDisjunctionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testReqOpt() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testReqOptNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, false, true, false });
+ }
+ }
+
+ public void testMinShouldMatch() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")), BooleanClause.Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build(), BooleanClause.Occur.SHOULD)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11 },
+ { 1, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2, 0, 0, 0, 2, 1, 1, 3, 5, 4, 4, 12, 14 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ { "w1", "w3", "w4" },
+ { "w3", "w3" },
+ { "w1", "xx", "w4" },
+ { "w1", "w1", "w4", "w3" },
+ {}
+ });
+ }
+
+ public void testMinShouldMatchNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build(), BooleanClause.Occur.SHOULD)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testExclusion() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "zz")), BooleanClause.Occur.MUST_NOT)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 2, 2, 6, 8 },
+ { 1 },
+ { 2 },
+ { 3, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testExclusionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.MUST_NOT)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
+ }
+ }
+
+ public void testConjunction() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.MUST)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 2, 2, 6, 8, 3, 3, 9, 11 },
+ { 1 },
+ { 2 },
+ { 3, 3, 3, 9, 11, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testConjunctionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
+ .add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.MUST)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
+ }
+ }
+
+ public void testWildcards() throws IOException {
+ Query q = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "x"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0 },
+ { 1 },
+ { 2, 1, 1, 3, 5 },
+ { 3 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ {}, {}, { "xx" }, {}
+ });
+
+ Query rq = new RegexpQuery(new Term(FIELD_WITH_OFFSETS, "w[1-2]"));
+ checkMatches(rq, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 1, 1, 3, 5 },
+ { 1, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 2, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
+ { 4 }
+ });
+
+ }
+
+ public void testNoMatchWildcards() throws IOException {
+ Query nomatch = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "wibble"));
+ Matches matches = searcher.createWeight(searcher.rewrite(nomatch), ScoreMode.COMPLETE_NO_SCORES, 1)
+ .matches(searcher.leafContexts.get(0), 0);
+ assertNull(matches);
+ }
+
+ public void testWildcardsNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new PrefixQuery(new Term(field, "x"));
+ checkNoPositionsMatches(q, field, new boolean[]{ false, false, true, false, false });
+ }
+ }
+
+ public void testSynonymQuery() throws IOException {
+ Query q = new SynonymQuery(new Term(FIELD_WITH_OFFSETS, "w1"), new Term(FIELD_WITH_OFFSETS, "w2"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 1, 1, 3, 5 },
+ { 1, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 2, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
+ { 4 }
+ });
+ }
+
+ public void testSynonymQueryNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new SynonymQuery(new Term(field, "w1"), new Term(field, "w2"));
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testMultipleFields() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("id", "1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
+
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(1, searcher.leafContexts));
+ Matches m = w.matches(ctx, 1 - ctx.docBase);
+ assertNotNull(m);
+ checkFieldMatches(m.getMatches("id"), new int[]{ -1, 0, 0, -1, -1 });
+ checkFieldMatches(m.getMatches(FIELD_WITH_OFFSETS), new int[]{ -1, 1, 1, 3, 5, 3, 3, 9, 11 });
+ assertNull(m.getMatches("bogus"));
+
+ Set<String> fields = new HashSet<>();
+ for (String field : m) {
+ fields.add(field);
+ }
+ assertEquals(2, fields.size());
+ assertTrue(fields.contains(FIELD_WITH_OFFSETS));
+ assertTrue(fields.contains("id"));
+ }
+
+ protected String[] doc1Fields = {
+ "w1 w2 w3 w4 w5",
+ "w1 w3 w2 w3 zz",
+ "w1 xx w2 yy w4",
+ "w1 w2 w1 w4 w2 w3"
+ };
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 9f2bd90..3b99ccf 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@@ -151,6 +152,28 @@ public class ToParentBlockJoinQuery extends Query {
}
return Explanation.noMatch("Not a match");
}
+
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ // The default implementation would delegate to the joinQuery's Weight, which
+ // matches on children. We need to match on the parent instead
+ Scorer scorer = scorer(context);
+ if (scorer == null) {
+ return null;
+ }
+ final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+ if (twoPhase == null) {
+ if (scorer.iterator().advance(doc) != doc) {
+ return null;
+ }
+ }
+ else {
+ if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
+ return null;
+ }
+ }
+ return Matches.MATCH_WITH_NO_TERMS;
+ }
}
private static class ParentApproximation extends DocIdSetIterator {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
index 0d39e8b..de0e6d4 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
@@ -29,6 +29,7 @@ import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
@@ -149,6 +150,11 @@ public final class FunctionScoreQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return inner.matches(context, doc);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Explanation scoreExplanation = inner.explain(context, doc);
if (scoreExplanation.isMatch() == false) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
index 8f82192..8d6836b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
@@ -137,6 +137,28 @@ public final class CoveringQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
+ if (minMatchValues.advanceExact(doc) == false) {
+ return null;
+ }
+ final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
+ long matchCount = 0;
+ List<Matches> subMatches = new ArrayList<>();
+ for (Weight weight : weights) {
+ Matches matches = weight.matches(context, doc);
+ if (matches != null) {
+ matchCount++;
+ subMatches.add(matches);
+ }
+ }
+ if (matchCount < minimumNumberMatch) {
+ return null;
+ }
+ return Matches.fromSubMatches(subMatches);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
if (minMatchValues.advanceExact(doc) == false) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
new file mode 100644
index 0000000..c5c6e98
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+class AssertingMatches implements Matches {
+
+ private final Matches in;
+
+ AssertingMatches(Matches matches) {
+ this.in = matches;
+ }
+
+ @Override
+ public MatchesIterator getMatches(String field) throws IOException {
+ MatchesIterator mi = in.getMatches(field);
+ if (mi == null)
+ return null;
+ return new AssertingMatchesIterator(mi);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return in.iterator();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
new file mode 100644
index 0000000..52fb184
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+
+class AssertingMatchesIterator implements MatchesIterator {
+
+ private final MatchesIterator in;
+ private State state = State.UNPOSITIONED;
+
+ private enum State { UNPOSITIONED, ITERATING, EXHAUSTED }
+
+ AssertingMatchesIterator(MatchesIterator in) {
+ this.in = in;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ assert state != State.EXHAUSTED : state;
+ boolean more = in.next();
+ if (more == false) {
+ state = State.EXHAUSTED;
+ }
+ else {
+ state = State.ITERATING;
+ }
+ return more;
+ }
+
+ @Override
+ public int startPosition() {
+ assert state == State.ITERATING : state;
+ return in.startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ assert state == State.ITERATING : state;
+ return in.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ assert state == State.ITERATING : state;
+ return in.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ assert state == State.ITERATING : state;
+ return in.endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ assert state == State.ITERATING : state;
+ return in.term();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
index 8e3a29f..55fda23 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
@@ -32,6 +32,14 @@ class AssertingWeight extends FilterWeight {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Matches matches = in.matches(context, doc);
+ if (matches == null)
+ return null;
+ return new AssertingMatches(matches);
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (random.nextBoolean()) {
final Scorer inScorer = in.scorer(context);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
index 4452c19..a918078 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
@@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.LuceneTestCase;
+import static junit.framework.Assert.assertNotNull;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
/**
@@ -56,7 +58,7 @@ public class CheckHits {
if (ignore.contains(Integer.valueOf(doc))) continue;
Explanation exp = searcher.explain(q, doc);
- Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
+ assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
exp);
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
" doesn't indicate non-match: " + exp.toString(),
@@ -300,6 +302,16 @@ public class CheckHits {
(query, defaultFieldName, searcher, deep));
}
+
+ /**
+ * Asserts that the result of calling {@link Weight#matches(LeafReaderContext, int)}
+ * for every document matching a query returns a non-null {@link Matches}
+ * @param query the query to test
+ * @param searcher the search to test against
+ */
+ public static void checkMatches(Query query, IndexSearcher searcher) throws IOException {
+ searcher.search(query, new MatchesAsserter(query, searcher));
+ }
private static final Pattern COMPUTED_FROM_PATTERN = Pattern.compile(".*, computed as .* from:");
@@ -505,7 +517,7 @@ public class CheckHits {
("exception in hitcollector of [["+d+"]] for #"+doc, e);
}
- Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
+ assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
verifyExplanation(d,doc,scorer.score(),deep,exp);
Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
" does not indicate match: " + exp.toString(),
@@ -522,6 +534,45 @@ public class CheckHits {
}
}
+ /**
+ * Asserts that the {@link Matches} from a query is non-null whenever
+ * the document its created for is a hit.
+ *
+ * Also checks that the previous non-matching document has a {@code null} {@link Matches}
+ */
+ public static class MatchesAsserter extends SimpleCollector {
+
+ private final Weight weight;
+ private LeafReaderContext context;
+ int lastCheckedDoc = -1;
+
+ public MatchesAsserter(Query query, IndexSearcher searcher) throws IOException {
+ this.weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ this.context = context;
+ this.lastCheckedDoc = -1;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ Matches matches = this.weight.matches(context, doc);
+ assertNotNull("Unexpected null Matches object in doc" + doc + " for query " + this.weight.getQuery(), matches);
+ if (lastCheckedDoc != doc - 1) {
+ assertNull("Unexpected non-null Matches object in non-matching doc" + doc + " for query " + this.weight.getQuery(),
+ this.weight.matches(context, doc - 1));
+ }
+ lastCheckedDoc = doc;
+ }
+
+ @Override
+ public ScoreMode scoreMode() {
+ return ScoreMode.COMPLETE_NO_SCORES;
+ }
+ }
+
public static void checkTopScores(Random random, Query query, IndexSearcher searcher) throws IOException {
// Check it computed the top hits correctly
doCheckTopScores(query, searcher, 1);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/040a9601/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index 139cb9d..f68b717 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -108,6 +108,7 @@ public class QueryUtils {
* @see #checkSkipTo
* @see #checkExplanations
* @see #checkEqual
+ * @see CheckHits#checkMatches(Query, IndexSearcher)
*/
public static void check(Random random, Query q1, IndexSearcher s) {
check(random, q1, s, true);
@@ -125,6 +126,7 @@ public class QueryUtils {
check(random, q1, wrapUnderlyingReader(random, s, +1), false);
}
checkExplanations(q1,s);
+ CheckHits.checkMatches(q1, s);
}
} catch (IOException e) {
throw new RuntimeException(e);
[2/3] lucene-solr:branch_7x: LUCENE-8229: Add Weight.matches() to
iterate over match positions
Posted by ro...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
index 4435268..bb71cdf 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
@@ -21,24 +21,30 @@ import java.util.Locale;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
+import java.util.regex.Pattern;
import junit.framework.Assert;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.LuceneTestCase;
+import static junit.framework.Assert.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
/**
* Utility class for asserting expected hits in tests.
*/
public class CheckHits {
-
+
/**
* Some explains methods calculate their values though a slightly
* different order of operations from the actual scoring method ...
* this allows for a small amount of relative variation
*/
public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.001f;
-
+
/**
* In general we use a relative epsilon, but some tests do crazy things
* like boost documents with 0, creating tiny tiny scores where the
@@ -46,56 +52,57 @@ public class CheckHits {
* we ensure the the epsilon is always at least this big.
*/
public static float EXPLAIN_SCORE_TOLERANCE_MINIMUM = 1e-6f;
-
+
/**
* Tests that all documents up to maxDoc which are *not* in the
- * expected result set, have an explanation which indicates that
+ * expected result set, have an explanation which indicates that
* the document does not match
*/
public static void checkNoMatchExplanations(Query q, String defaultFieldName,
IndexSearcher searcher, int[] results)
- throws IOException {
+ throws IOException {
String d = q.toString(defaultFieldName);
Set<Integer> ignore = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
ignore.add(Integer.valueOf(results[i]));
}
-
+
int maxDoc = searcher.getIndexReader().maxDoc();
for (int doc = 0; doc < maxDoc; doc++) {
if (ignore.contains(Integer.valueOf(doc))) continue;
Explanation exp = searcher.explain(q, doc);
- Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
- exp);
- Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
- " doesn't indicate non-match: " + exp.toString(),
- exp.isMatch());
+ assertNotNull("Explanation of [[" + d + "]] for #" + doc + " is null",
+ exp);
+ Assert.assertFalse("Explanation of [[" + d + "]] for #" + doc +
+ " doesn't indicate non-match: " + exp.toString(),
+ exp.isMatch());
}
-
+
}
-
+
/**
* Tests that a query matches the an expected set of documents using a
* HitCollector.
- *
+ * <p>
* <p>
* Note that when using the HitCollector API, documents will be collected
* if they "match" regardless of what their score is.
* </p>
- * @param query the query to test
- * @param searcher the searcher to test the query against
+ *
+ * @param query the query to test
+ * @param searcher the searcher to test the query against
* @param defaultFieldName used for displaying the query in assertion messages
- * @param results a list of documentIds that must match the query
+ * @param results a list of documentIds that must match the query
* @see #checkHits
*/
public static void checkHitCollector(Random random, Query query, String defaultFieldName,
IndexSearcher searcher, int[] results)
- throws IOException {
+ throws IOException {
+
+ QueryUtils.check(random, query, searcher);
- QueryUtils.check(random,query,searcher);
-
Set<Integer> correct = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
@@ -104,17 +111,17 @@ public class CheckHits {
final Collector c = new SetCollector(actual);
searcher.search(query, c);
- Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
- correct, actual);
+ Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
+ correct, actual);
for (int i = -1; i < 2; i++) {
actual.clear();
IndexSearcher s = QueryUtils.wrapUnderlyingReader
- (random, searcher, i);
+ (random, searcher, i);
s.search(query, c);
Assert.assertEquals("Wrap Reader " + i + ": " +
- query.toString(defaultFieldName),
- correct, actual);
+ query.toString(defaultFieldName),
+ correct, actual);
}
}
@@ -123,21 +130,27 @@ public class CheckHits {
*/
public static class SetCollector extends SimpleCollector {
final Set<Integer> bag;
+
public SetCollector(Set<Integer> bag) {
this.bag = bag;
}
+
private int base = 0;
+
@Override
- public void setScorer(Scorer scorer) throws IOException {}
+ public void setScorer(Scorer scorer) throws IOException {
+ }
+
@Override
public void collect(int doc) {
bag.add(Integer.valueOf(doc + base));
}
+
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
base = context.docBase;
}
-
+
@Override
public boolean needsScores() {
return false;
@@ -146,24 +159,25 @@ public class CheckHits {
/**
* Tests that a query matches the an expected set of documents using Hits.
- *
+ * <p>
* <p>
* Note that when using the Hits API, documents will only be returned
* if they have a positive normalized score.
* </p>
- * @param query the query to test
- * @param searcher the searcher to test the query against
+ *
+ * @param query the query to test
+ * @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
- * @param results a list of documentIds that must match the query
+ * @param results a list of documentIds that must match the query
* @see #checkHitCollector
*/
public static void checkHits(
- Random random,
- Query query,
- String defaultFieldName,
- IndexSearcher searcher,
- int[] results)
- throws IOException {
+ Random random,
+ Query query,
+ String defaultFieldName,
+ IndexSearcher searcher,
+ int[] results)
+ throws IOException {
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
@@ -179,10 +193,12 @@ public class CheckHits {
Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
- QueryUtils.check(random, query,searcher, LuceneTestCase.rarely(random));
+ QueryUtils.check(random, query, searcher, LuceneTestCase.rarely(random));
}
- /** Tests that a Hits has an expected order of documents */
+ /**
+ * Tests that a Hits has an expected order of documents
+ */
public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits) {
Assert.assertEquals(mes + " nr of hits", hits.length, results.length);
for (int i = 0; i < results.length; i++) {
@@ -190,14 +206,15 @@ public class CheckHits {
}
}
- /** Tests that two queries have an expected order of documents,
+ /**
+ * Tests that two queries have an expected order of documents,
* and that the two queries have the same score values.
*/
public static void checkHitsQuery(
- Query query,
- ScoreDoc[] hits1,
- ScoreDoc[] hits2,
- int[] results) {
+ Query query,
+ ScoreDoc[] hits1,
+ ScoreDoc[] hits2,
+ int[] results) {
checkDocIds("hits1", results, hits1);
checkDocIds("hits2", results, hits2);
@@ -205,48 +222,47 @@ public class CheckHits {
}
public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) {
- final float scoreTolerance = 1.0e-6f;
- if (hits1.length != hits2.length) {
- Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length);
- }
+ final float scoreTolerance = 1.0e-6f;
+ if (hits1.length != hits2.length) {
+ Assert.fail("Unequal lengths: hits1=" + hits1.length + ",hits2=" + hits2.length);
+ }
for (int i = 0; i < hits1.length; i++) {
if (hits1[i].doc != hits2[i].doc) {
Assert.fail("Hit " + i + " docnumbers don't match\n"
- + hits2str(hits1, hits2,0,0)
- + "for query:" + query.toString());
+ + hits2str(hits1, hits2, 0, 0)
+ + "for query:" + query.toString());
}
if ((hits1[i].doc != hits2[i].doc)
- || Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance)
- {
+ || Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance) {
Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc
- + "\nunequal : " + hits1[i].score
- + "\n and: " + hits2[i].score
- + "\nfor query:" + query.toString());
+ + "\nunequal : " + hits1[i].score
+ + "\n and: " + hits2[i].score
+ + "\nfor query:" + query.toString());
}
}
}
public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) {
StringBuilder sb = new StringBuilder();
- int len1=hits1==null ? 0 : hits1.length;
- int len2=hits2==null ? 0 : hits2.length;
- if (end<=0) {
- end = Math.max(len1,len2);
+ int len1 = hits1 == null ? 0 : hits1.length;
+ int len2 = hits2 == null ? 0 : hits2.length;
+ if (end <= 0) {
+ end = Math.max(len1, len2);
}
- sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2);
+ sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2);
sb.append('\n');
- for (int i=start; i<end; i++) {
- sb.append("hit=").append(i).append(':');
- if (i<len1) {
- sb.append(" doc").append(hits1[i].doc).append('=').append(hits1[i].score);
+ for (int i = start; i < end; i++) {
+ sb.append("hit=").append(i).append(':');
+ if (i < len1) {
+ sb.append(" doc").append(hits1[i].doc).append('=').append(hits1[i].score);
} else {
sb.append(" ");
}
sb.append(",\t");
- if (i<len2) {
+ if (i < len2) {
sb.append(" doc").append(hits2[i].doc).append('=').append(hits2[i].score);
}
sb.append('\n');
@@ -257,10 +273,10 @@ public class CheckHits {
public static String topdocsString(TopDocs docs, int start, int end) {
StringBuilder sb = new StringBuilder();
- sb.append("TopDocs totalHits=").append(docs.totalHits).append(" top=").append(docs.scoreDocs.length).append('\n');
- if (end<=0) end=docs.scoreDocs.length;
- else end=Math.min(end,docs.scoreDocs.length);
- for (int i=start; i<end; i++) {
+ sb.append("TopDocs totalHits=").append(docs.totalHits).append(" top=").append(docs.scoreDocs.length).append('\n');
+ if (end <= 0) end = docs.scoreDocs.length;
+ else end = Math.min(end, docs.scoreDocs.length);
+ for (int i = start; i < end; i++) {
sb.append('\t');
sb.append(i);
sb.append(") doc=");
@@ -274,15 +290,14 @@ public class CheckHits {
/**
* Asserts that the explanation value for every document matching a
- * query corresponds with the true score.
+ * query corresponds with the true score.
*
+ * @param query the query to test
+ * @param searcher the searcher to test the query against
+ * @param defaultFieldName used for displaing the query in assertion messages
* @see ExplanationAsserter
* @see #checkExplanations(Query, String, IndexSearcher, boolean) for a
* "deep" testing of the explanation details.
- *
- * @param query the query to test
- * @param searcher the searcher to test the query against
- * @param defaultFieldName used for displaing the query in assertion messages
*/
public static void checkExplanations(Query query,
String defaultFieldName,
@@ -292,51 +307,64 @@ public class CheckHits {
/**
* Asserts that the explanation value for every document matching a
- * query corresponds with the true score. Optionally does "deep"
+ * query corresponds with the true score. Optionally does "deep"
* testing of the explanation details.
*
- * @see ExplanationAsserter
- * @param query the query to test
- * @param searcher the searcher to test the query against
+ * @param query the query to test
+ * @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
- * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
+ * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
+ * @see ExplanationAsserter
*/
public static void checkExplanations(Query query,
String defaultFieldName,
- IndexSearcher searcher,
+ IndexSearcher searcher,
boolean deep) throws IOException {
searcher.search(query,
- new ExplanationAsserter
- (query, defaultFieldName, searcher, deep));
+ new ExplanationAsserter
+ (query, defaultFieldName, searcher, deep));
}
- /** returns a reasonable epsilon for comparing two floats,
- * where minor differences are acceptable such as score vs. explain */
+ /**
+ * returns a reasonable epsilon for comparing two floats,
+ * where minor differences are acceptable such as score vs. explain
+ */
public static float explainToleranceDelta(float f1, float f2) {
return Math.max(EXPLAIN_SCORE_TOLERANCE_MINIMUM, Math.max(Math.abs(f1), Math.abs(f2)) * EXPLAIN_SCORE_TOLERANCE_DELTA);
}
- /**
+ /**
+ * Asserts that the result of calling {@link Weight#matches(LeafReaderContext, int)}
+ * for every document matching a query returns a non-null {@link Matches}
+ *
+ * @param query the query to test
+ * @param searcher the search to test against
+ */
+ public static void checkMatches(Query query, IndexSearcher searcher) throws IOException {
+ searcher.search(query, new MatchesAsserter(query, searcher));
+ }
+
+ /**
* Assert that an explanation has the expected score, and optionally that its
* sub-details max/sum/factor match to that score.
*
- * @param q String representation of the query for assertion messages
- * @param doc Document ID for assertion messages
+ * @param q String representation of the query for assertion messages
+ * @param doc Document ID for assertion messages
* @param score Real score value of doc with query q
- * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
- * @param expl The Explanation to match against score
+ * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
+ * @param expl The Explanation to match against score
*/
- public static void verifyExplanation(String q,
- int doc,
+ public static void verifyExplanation(String q,
+ int doc,
float score,
boolean deep,
Explanation expl) {
float value = expl.getValue();
- Assert.assertEquals(q+": score(doc="+doc+")="+score+
- " != explanationScore="+value+" Explanation: "+expl,
- score,value,explainToleranceDelta(score, value));
+ Assert.assertEquals(q + ": score(doc=" + doc + ")=" + score +
+ " != explanationScore=" + value + " Explanation: " + expl,
+ score, value, explainToleranceDelta(score, value));
if (!deep) return;
@@ -350,14 +378,14 @@ public class CheckHits {
Assert.assertTrue("Child doc explanations are missing", detail.length > 0);
}
if (detail.length > 0) {
- if (detail.length==1) {
+ if (detail.length == 1) {
// simple containment, unless it's a freq of: (which lets a query explain how the freq is calculated),
// just verify contained expl has same score
if (expl.getDescription().endsWith("with freq of:") == false
// with dismax, even if there is a single sub explanation, its
// score might be different if the score is negative
&& (score >= 0 || expl.getDescription().endsWith("times others of:") == false)) {
- verifyExplanation(q,doc,score,deep,detail[0]);
+ verifyExplanation(q, doc, score, deep, detail[0]);
}
} else {
// explanation must either:
@@ -372,11 +400,11 @@ public class CheckHits {
if (!(productOf || sumOf || maxOf || computedOf)) {
// maybe 'max plus x times others'
int k1 = descr.indexOf("max plus ");
- if (k1>=0) {
+ if (k1 >= 0) {
k1 += "max plus ".length();
- int k2 = descr.indexOf(" ",k1);
+ int k2 = descr.indexOf(" ", k1);
try {
- x = Float.parseFloat(descr.substring(k1,k2).trim());
+ x = Float.parseFloat(descr.substring(k1, k2).trim());
if (descr.substring(k2).trim().equals("times others of:")) {
maxTimesOthers = true;
}
@@ -386,19 +414,19 @@ public class CheckHits {
}
// TODO: this is a TERRIBLE assertion!!!!
Assert.assertTrue(
- q+": multi valued explanation description=\""+descr
- +"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
- +" or 'sum of:' or 'max of:' - "+expl,
+ q + ": multi valued explanation description=\"" + descr
+ + "\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
+ + " or 'sum of:' or 'max of:' - " + expl,
productOf || sumOf || maxOf || computedOf || maxTimesOthers);
float sum = 0;
float product = 1;
float max = 0;
- for (int i=0; i<detail.length; i++) {
+ for (int i = 0; i < detail.length; i++) {
float dval = detail[i].getValue();
- verifyExplanation(q,doc,dval,deep,detail[i]);
+ verifyExplanation(q, doc, dval, deep, detail[i]);
product *= dval;
sum += dval;
- max = Math.max(max,dval);
+ max = Math.max(max, dval);
}
float combined = 0;
if (productOf) {
@@ -413,9 +441,9 @@ public class CheckHits {
Assert.assertTrue("should never get here!", computedOf);
combined = value;
}
- Assert.assertEquals(q+": actual subDetails combined=="+combined+
- " != value="+value+" Explanation: "+expl,
- combined,value,explainToleranceDelta(combined, value));
+ Assert.assertEquals(q + ": actual subDetails combined==" + combined +
+ " != value=" + value + " Explanation: " + expl,
+ combined, value, explainToleranceDelta(combined, value));
}
}
}
@@ -430,24 +458,28 @@ public class CheckHits {
public ExplanationAssertingSearcher(IndexReader r) {
super(r);
}
+
protected void checkExplanations(Query q) throws IOException {
super.search(q,
- new ExplanationAsserter
- (q, null, this));
+ new ExplanationAsserter
+ (q, null, this));
}
+
@Override
public TopFieldDocs search(Query query,
int n,
Sort sort) throws IOException {
-
+
checkExplanations(query);
- return super.search(query,n,sort);
+ return super.search(query, n, sort);
}
+
@Override
public void search(Query query, Collector results) throws IOException {
checkExplanations(query);
super.search(query, results);
}
+
@Override
public TopDocs search(Query query, int n) throws IOException {
@@ -455,11 +487,11 @@ public class CheckHits {
return super.search(query, n);
}
}
-
+
/**
* Asserts that the score explanation for every document matching a
* query corresponds with the true score.
- *
+ * <p>
* NOTE: this HitCollector should only be used with the Query and Searcher
* specified at when it is constructed.
*
@@ -471,26 +503,29 @@ public class CheckHits {
IndexSearcher s;
String d;
boolean deep;
-
+
Scorer scorer;
private int base = 0;
- /** Constructs an instance which does shallow tests on the Explanation */
+ /**
+ * Constructs an instance which does shallow tests on the Explanation
+ */
public ExplanationAsserter(Query q, String defaultFieldName, IndexSearcher s) {
- this(q,defaultFieldName,s,false);
- }
+ this(q, defaultFieldName, s, false);
+ }
+
public ExplanationAsserter(Query q, String defaultFieldName, IndexSearcher s, boolean deep) {
- this.q=q;
- this.s=s;
+ this.q = q;
+ this.s = s;
this.d = q.toString(defaultFieldName);
- this.deep=deep;
- }
-
+ this.deep = deep;
+ }
+
@Override
public void setScorer(Scorer scorer) throws IOException {
- this.scorer = scorer;
+ this.scorer = scorer;
}
-
+
@Override
public void collect(int doc) throws IOException {
Explanation exp = null;
@@ -499,26 +534,65 @@ public class CheckHits {
exp = s.explain(q, doc);
} catch (IOException e) {
throw new RuntimeException
- ("exception in hitcollector of [["+d+"]] for #"+doc, e);
+ ("exception in hitcollector of [[" + d + "]] for #" + doc, e);
}
-
- Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
- verifyExplanation(d,doc,scorer.score(),deep,exp);
- Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
- " does not indicate match: " + exp.toString(),
- exp.isMatch());
+
+ assertNotNull("Explanation of [[" + d + "]] for #" + doc + " is null", exp);
+ verifyExplanation(d, doc, scorer.score(), deep, exp);
+ Assert.assertTrue("Explanation of [[" + d + "]] for #" + doc +
+ " does not indicate match: " + exp.toString(),
+ exp.isMatch());
}
+
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
base = context.docBase;
}
-
+
@Override
public boolean needsScores() {
return true;
}
}
+ /**
+ * Asserts that the {@link Matches} from a query is non-null whenever
+ * the document its created for is a hit.
+ * <p>
+ * Also checks that the previous non-matching document has a {@code null} {@link Matches}
+ */
+ public static class MatchesAsserter extends SimpleCollector {
+
+ private final Weight weight;
+ private LeafReaderContext context;
+ int lastCheckedDoc = -1;
+
+ public MatchesAsserter(Query query, IndexSearcher searcher) throws IOException {
+ this.weight = searcher.createWeight(searcher.rewrite(query), false, 1);
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ this.context = context;
+ this.lastCheckedDoc = -1;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ Matches matches = this.weight.matches(context, doc);
+ assertNotNull("Unexpected null Matches object in doc" + doc + " for query " + this.weight.getQuery(), matches);
+ if (lastCheckedDoc != doc - 1) {
+ assertNull("Unexpected non-null Matches object in non-matching doc" + doc + " for query " + this.weight.getQuery(),
+ this.weight.matches(context, doc - 1));
+ }
+ lastCheckedDoc = doc;
+ }
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index 99f0508..6a18080 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -108,6 +108,7 @@ public class QueryUtils {
* @see #checkSkipTo
* @see #checkExplanations
* @see #checkEqual
+ * @see CheckHits#checkMatches(Query, IndexSearcher)
*/
public static void check(Random random, Query q1, IndexSearcher s) {
check(random, q1, s, true);
@@ -125,6 +126,7 @@ public class QueryUtils {
check(random, q1, wrapUnderlyingReader(random, s, +1), false);
}
checkExplanations(q1,s);
+ CheckHits.checkMatches(q1, s);
}
} catch (IOException e) {
throw new RuntimeException(e);
[3/3] lucene-solr:branch_7x: LUCENE-8229: Add Weight.matches() to
iterate over match positions
Posted by ro...@apache.org.
LUCENE-8229: Add Weight.matches() to iterate over match positions
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/502fd4bf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/502fd4bf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/502fd4bf
Branch: refs/heads/branch_7x
Commit: 502fd4bf12b8860b8eea504a96ad1b49dd52938c
Parents: 2376b6c
Author: Alan Woodward <ro...@apache.org>
Authored: Wed Apr 11 09:43:27 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Wed Apr 11 09:51:07 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +
.../org/apache/lucene/search/BooleanWeight.java | 35 ++
.../lucene/search/ConstantScoreQuery.java | 5 +
.../search/DisjunctionMatchesIterator.java | 171 +++++++
.../lucene/search/DisjunctionMaxQuery.java | 12 +
.../lucene/search/DocValuesRewriteMethod.java | 20 +-
.../org/apache/lucene/search/FilterWeight.java | 4 +
.../lucene/search/IndexOrDocValuesQuery.java | 6 +
.../org/apache/lucene/search/LRUQueryCache.java | 5 +
.../java/org/apache/lucene/search/Matches.java | 146 ++++++
.../apache/lucene/search/MatchesIterator.java | 79 ++++
.../MultiTermQueryConstantScoreWrapper.java | 12 +
.../org/apache/lucene/search/SynonymQuery.java | 11 +
.../apache/lucene/search/TermInSetQuery.java | 9 +
.../lucene/search/TermMatchesIterator.java | 77 ++++
.../org/apache/lucene/search/TermQuery.java | 18 +
.../java/org/apache/lucene/search/Weight.java | 29 ++
.../lucene/search/TestMatchesIterator.java | 440 +++++++++++++++++++
.../search/join/ToParentBlockJoinQuery.java | 23 +
.../queries/function/FunctionScoreQuery.java | 6 +
.../org/apache/lucene/search/CoveringQuery.java | 22 +
.../apache/lucene/search/AssertingMatches.java | 43 ++
.../lucene/search/AssertingMatchesIterator.java | 77 ++++
.../apache/lucene/search/AssertingWeight.java | 8 +
.../org/apache/lucene/search/CheckHits.java | 340 ++++++++------
.../org/apache/lucene/search/QueryUtils.java | 2 +
26 files changed, 1467 insertions(+), 138 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7e47911..0c7026b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,6 +41,11 @@ New Features
soft deletes if the reader is opened form a directory. (Simon Willnauer,
Mike McCandless, Uwe Schindler, Adrien Grand)
+* LUCENE-8229: Add a method Weight.matches(LeafReaderContext, doc) that returns
+ an iterator over matching positions for a given query and document. This
+ allows exact hit extraction and will enable implementation of accurate
+ highlighters. (Alan Woodward, Adrien Grand, David Smiley)
+
Bug Fixes
* LUCENE-8234: Fixed bug in how spatial relationship is computed for
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
index dbe3d17..96bfdac 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@@ -114,6 +114,41 @@ final class BooleanWeight extends Weight {
}
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ final int minShouldMatch = query.getMinimumNumberShouldMatch();
+ List<Matches> matches = new ArrayList<>();
+ int shouldMatchCount = 0;
+ Iterator<Weight> wIt = weights.iterator();
+ Iterator<BooleanClause> cIt = query.clauses().iterator();
+ while (wIt.hasNext()) {
+ Weight w = wIt.next();
+ BooleanClause bc = cIt.next();
+ Matches m = w.matches(context, doc);
+ if (bc.isProhibited()) {
+ if (m != null) {
+ return null;
+ }
+ }
+ if (bc.isRequired()) {
+ if (m == null) {
+ return null;
+ }
+ matches.add(m);
+ }
+ if (bc.getOccur() == Occur.SHOULD) {
+ if (m != null) {
+ matches.add(m);
+ shouldMatchCount++;
+ }
+ }
+ }
+ if (shouldMatchCount < minShouldMatch) {
+ return null;
+ }
+ return Matches.fromSubMatches(matches);
+ }
+
static BulkScorer disableScoring(final BulkScorer scorer) {
return new BulkScorer() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 90cc5b4..3ee4158 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -151,6 +151,11 @@ public final class ConstantScoreQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return innerWeight.matches(context, doc);
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
new file mode 100644
index 0000000..37770d2
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * A {@link MatchesIterator} that combines matches from a set of sub-iterators
+ *
+ * Matches are sorted by their start positions, and then by their end positions, so that
+ * prefixes sort first. Matches may overlap, or be duplicated if they appear in more
+ * than one of the sub-iterators.
+ */
+final class DisjunctionMatchesIterator implements MatchesIterator {
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms
+ *
+ * Only terms that have at least one match in the given document will be included
+ */
+ static MatchesIterator fromTerms(LeafReaderContext context, int doc, String field, List<Term> terms) throws IOException {
+ Objects.requireNonNull(field);
+ for (Term term : terms) {
+ if (Objects.equals(field, term.field()) == false) {
+ throw new IllegalArgumentException("Tried to generate iterator from terms in multiple fields: expected [" + field + "] but got [" + term.field() + "]");
+ }
+ }
+ return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
+ }
+
+ private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
+ return new BytesRefIterator() {
+ int i = 0;
+ @Override
+ public BytesRef next() {
+ if (i >= terms.size())
+ return null;
+ return terms.get(i++).bytes();
+ }
+ };
+ }
+
+ /**
+ * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
+ *
+ * Only terms that have at least one match in the given document will be included
+ */
+ static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, String field, BytesRefIterator terms) throws IOException {
+ Objects.requireNonNull(field);
+ List<MatchesIterator> mis = new ArrayList<>();
+ Terms t = context.reader().terms(field);
+ if (t == null)
+ return null;
+ TermsEnum te = t.iterator();
+ PostingsEnum reuse = null;
+ for (BytesRef term = terms.next(); term != null; term = terms.next()) {
+ if (te.seekExact(term)) {
+ PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
+ if (pe.advance(doc) == doc) {
+ // TODO do we want to use the copied term here, or instead create a label that associates all of the TMIs with a single term?
+ mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
+ reuse = null;
+ }
+ else {
+ reuse = pe;
+ }
+ }
+ }
+ if (mis.size() == 0)
+ return null;
+ if (mis.size() == 1)
+ return mis.get(0);
+ return new DisjunctionMatchesIterator(mis);
+ }
+
+ static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
+ if (mis.size() == 0)
+ return null;
+ if (mis.size() == 1)
+ return mis.get(0);
+ return new DisjunctionMatchesIterator(mis);
+ }
+
+ private final PriorityQueue<MatchesIterator> queue;
+
+ private boolean started = false;
+
+ private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
+ queue = new PriorityQueue<MatchesIterator>(matches.size()){
+ @Override
+ protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
+ return a.startPosition() < b.startPosition() ||
+ (a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition()) ||
+ (a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition() && a.term().compareTo(b.term()) < 0);
+ }
+ };
+ for (MatchesIterator mi : matches) {
+ if (mi.next()) {
+ queue.add(mi);
+ }
+ }
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (started == false) {
+ return started = true;
+ }
+ if (queue.top().next() == false) {
+ queue.pop();
+ }
+ if (queue.size() > 0) {
+ queue.updateTop();
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int startPosition() {
+ return queue.top().startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return queue.top().endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return queue.top().startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return queue.top().endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ return queue.top().term();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index 13237a2..29a57e3 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -115,6 +115,18 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
}
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ List<Matches> mis = new ArrayList<>();
+ for (Weight weight : weights) {
+ Matches mi = weight.matches(context, doc);
+ if (mi != null) {
+ mis.add(mi);
+ }
+ }
+ return Matches.fromSubMatches(mis);
+ }
+
/** Create the scorer used to score our associated DisjunctionMaxQuery */
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
index 602864d..7034328 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@@ -74,11 +74,16 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
+
@Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
- TermsEnum termsEnum = query.getTermsEnum(new Terms() {
-
+ return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, getTermsEnum(fcsi)));
+ }
+
+ private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
+ return query.getTermsEnum(new Terms() {
+
@Override
public TermsEnum iterator() throws IOException {
return fcsi.termsEnum();
@@ -118,13 +123,18 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
public boolean hasPositions() {
return false;
}
-
+
@Override
public boolean hasPayloads() {
return false;
}
});
-
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
+ TermsEnum termsEnum = getTermsEnum(fcsi);
assert termsEnum != null;
if (termsEnum.next() == null) {
// no matching terms
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
index 380ad40..5921915 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java
@@ -75,4 +75,8 @@ public abstract class FilterWeight extends Weight {
return in.scorer(context);
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return in.matches(context, doc);
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
index 06fb9c2..944f7d3 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@@ -120,6 +120,12 @@ public final class IndexOrDocValuesQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ // We need to check a single doc, so the dv query should perform better
+ return dvWeight.matches(context, doc);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better
return dvWeight.explain(context, doc);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index 5c828d6..fcd84d0 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -676,6 +676,11 @@ public class LRUQueryCache implements QueryCache, Accountable {
in.extractTerms(terms);
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return in.matches(context, doc);
+ }
+
private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
// The worst-case (dense) is a bit set which needs one bit per document
final long worstCaseRamUsage = maxDoc / 8;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/Matches.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Matches.java b/lucene/core/src/java/org/apache/lucene/search/Matches.java
new file mode 100644
index 0000000..3670563
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/Matches.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Reports the positions and optionally offsets of all matching terms in a query
+ * for a single document
+ *
+ * To obtain a {@link MatchesIterator} for a particular field, call {@link #getMatches(String)}.
+ * Note that you can call {@link #getMatches(String)} multiple times to retrieve new
+ * iterators, but it is not thread-safe.
+ */
+public interface Matches extends Iterable<String> {
+
+ /**
+ * Returns a {@link MatchesIterator} over the matches for a single field,
+ * or {@code null} if there are no matches in that field.
+ */
+ MatchesIterator getMatches(String field) throws IOException;
+
+ /**
+ * Indicates a match with no term positions, for example on a Point or DocValues field,
+ * or a field indexed as docs and freqs only
+ */
+ Matches MATCH_WITH_NO_TERMS = new Matches() {
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public MatchesIterator getMatches(String field) {
+ return null;
+ }
+ };
+
+ /**
+ * Amalgamate a collection of {@link Matches} into a single object
+ */
+ static Matches fromSubMatches(List<Matches> subMatches) {
+ if (subMatches == null || subMatches.size() == 0) {
+ return null;
+ }
+ List<Matches> sm = subMatches.stream().filter(m -> m != MATCH_WITH_NO_TERMS).collect(Collectors.toList());
+ if (sm.size() == 0) {
+ return MATCH_WITH_NO_TERMS;
+ }
+ if (sm.size() == 1) {
+ return sm.get(0);
+ }
+ Set<String> fields = new HashSet<>();
+ for (Matches m : sm) {
+ for (String field : m) {
+ fields.add(field);
+ }
+ }
+ return new Matches() {
+ @Override
+ public MatchesIterator getMatches(String field) throws IOException {
+ List<MatchesIterator> subIterators = new ArrayList<>();
+ for (Matches m : sm) {
+ MatchesIterator it = m.getMatches(field);
+ if (it != null) {
+ subIterators.add(it);
+ }
+ }
+ return DisjunctionMatchesIterator.fromSubIterators(subIterators);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return fields.iterator();
+ }
+ };
+ }
+
+ /**
+ * A functional interface that supplies a {@link MatchesIterator}
+ */
+ @FunctionalInterface
+ interface MatchesIteratorSupplier {
+ /** Return a new {@link MatchesIterator} */
+ MatchesIterator get() throws IOException;
+ }
+
+ /**
+ * Create a Matches for a single field
+ */
+ static Matches forField(String field, MatchesIteratorSupplier mis) throws IOException {
+
+ // The indirection here, using a Supplier object rather than a MatchesIterator
+ // directly, is to allow for multiple calls to Matches.getMatches() to return
+ // new iterators. We still need to call MatchesIteratorSupplier.get() eagerly
+ // to work out if we have a hit or not.
+
+ MatchesIterator mi = mis.get();
+ if (mi == null) {
+ return null;
+ }
+ return new Matches() {
+ boolean cached = true;
+ @Override
+ public MatchesIterator getMatches(String f) throws IOException {
+ if (Objects.equals(field, f) == false) {
+ return null;
+ }
+ if (cached == false) {
+ return mis.get();
+ }
+ cached = false;
+ return mi;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.singleton(field).iterator();
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
new file mode 100644
index 0000000..b874263
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * An iterator over match positions (and optionally offsets) for a single document and field
+ *
+ * To iterate over the matches, call {@link #next()} until it returns {@code false}, retrieving
+ * positions and/or offsets after each call. You should not call the position or offset methods
+ * before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
+ *
+ * Matches are ordered by start position, and then by end position. Match intervals may overlap.
+ *
+ * @see Weight#matches(LeafReaderContext, int)
+ */
+public interface MatchesIterator {
+
+ /**
+ * Advance the iterator to the next match position
+ * @return {@code true} if matches have not been exhausted
+ */
+ boolean next() throws IOException;
+
+ /**
+ * The start position of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int startPosition();
+
+ /**
+ * The end position of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int endPosition();
+
+ /**
+ * The starting offset of the current match, or {@code -1} if offsets are not available
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int startOffset() throws IOException;
+
+ /**
+ * The ending offset of the current match, or {@code -1} if offsets are not available
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ int endOffset() throws IOException;
+
+ /**
+ * The underlying term of the current match
+ *
+ * Should only be called after {@link #next()} has returned {@code true}
+ */
+ BytesRef term();
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
index 705946f..161d96d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
@@ -203,6 +203,18 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ final Terms terms = context.reader().terms(query.field);
+ if (terms == null) {
+ return null;
+ }
+ if (terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, query.getTermsEnum(terms)));
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
index ef01308..554633d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@@ -31,6 +31,7 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
@@ -157,6 +158,16 @@ public final class SynonymQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ String field = terms[0].field();
+ Terms terms = context.reader().terms(field);
+ if (terms == null || terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, field, Arrays.asList(SynonymQuery.this.terms)));
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
index e65efe7..1f61f7f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@@ -220,6 +220,15 @@ public class TermInSetQuery extends Query implements Accountable {
// order to protect highlighters
}
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Terms terms = context.reader().terms(field);
+ if (terms == null || terms.hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, field, termData.iterator()));
+ }
+
/**
* On the given leaf context, try to either rewrite to a disjunction if
* there are few matching terms, or build a bitset containing matching docs.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
new file mode 100644
index 0000000..0516996
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link MatchesIterator} over a single term's postings list
+ */
+class TermMatchesIterator implements MatchesIterator {
+
+ private int upto;
+ private int pos;
+ private final PostingsEnum pe;
+ private final BytesRef term;
+
+ /**
+ * Create a new {@link TermMatchesIterator} for the given term and postings list
+ */
+ TermMatchesIterator(BytesRef term, PostingsEnum pe) throws IOException {
+ this.pe = pe;
+ this.upto = pe.freq();
+ this.term = term;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (upto-- > 0) {
+ pos = pe.nextPosition();
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int startPosition() {
+ return pos;
+ }
+
+ @Override
+ public int endPosition() {
+ return pos;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return pe.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return pe.endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ return term;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index ab9e2be..8e635f2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -80,6 +80,24 @@ public class TermQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ TermsEnum te = getTermsEnum(context);
+ if (te == null) {
+ return null;
+ }
+ if (context.reader().terms(term.field()).hasPositions() == false) {
+ return super.matches(context, doc);
+ }
+ return Matches.forField(term.field(), () -> {
+ PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
+ if (pe.advance(doc) != doc) {
+ return null;
+ }
+ return new TermMatchesIterator(term.bytes(), pe);
+ });
+ }
+
+ @Override
public String toString() {
return "weight(" + TermQuery.this + ")";
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/java/org/apache/lucene/search/Weight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 3892d4f..13d058c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -70,6 +70,35 @@ public abstract class Weight implements SegmentCacheable {
public abstract void extractTerms(Set<Term> terms);
/**
+ * Returns {@link Matches} for a specific document, or {@code null} if the document
+ * does not match the parent query
+ *
+ * A query match that contains no position information (for example, a Point or
+ * DocValues query) will return {@link Matches#MATCH_WITH_NO_TERMS}
+ *
+ * @param context the reader's context to create the {@link Matches} for
+ * @param doc the document's id relative to the given context's reader
+ */
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Scorer scorer = scorer(context);
+ if (scorer == null) {
+ return null;
+ }
+ final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+ if (twoPhase == null) {
+ if (scorer.iterator().advance(doc) != doc) {
+ return null;
+ }
+ }
+ else {
+ if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
+ return null;
+ }
+ }
+ return Matches.MATCH_WITH_NO_TERMS;
+ }
+
+ /**
* An explanation of the score computation for the named document.
*
* @param context the readers context to create the {@link Explanation} for.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
new file mode 100644
index 0000000..f617957
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestMatchesIterator extends LuceneTestCase {
+
+ protected IndexSearcher searcher;
+ protected Directory directory;
+ protected IndexReader reader;
+
+ private static final String FIELD_WITH_OFFSETS = "field_offsets";
+ private static final String FIELD_NO_OFFSETS = "field_no_offsets";
+ private static final String FIELD_DOCS_ONLY = "field_docs_only";
+ private static final String FIELD_FREQS = "field_freqs";
+
+ private static final FieldType OFFSETS = new FieldType(TextField.TYPE_STORED);
+ static {
+ OFFSETS.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ }
+
+ private static final FieldType DOCS = new FieldType(TextField.TYPE_STORED);
+ static {
+ DOCS.setIndexOptions(IndexOptions.DOCS);
+ }
+
+ private static final FieldType DOCS_AND_FREQS = new FieldType(TextField.TYPE_STORED);
+ static {
+ DOCS_AND_FREQS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ directory.close();
+ super.tearDown();
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
+ newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+ for (int i = 0; i < docFields.length; i++) {
+ Document doc = new Document();
+ doc.add(newField(FIELD_WITH_OFFSETS, docFields[i], OFFSETS));
+ doc.add(newField(FIELD_NO_OFFSETS, docFields[i], TextField.TYPE_STORED));
+ doc.add(newField(FIELD_DOCS_ONLY, docFields[i], DOCS));
+ doc.add(newField(FIELD_FREQS, docFields[i], DOCS_AND_FREQS));
+ doc.add(new NumericDocValuesField("id", i));
+ doc.add(newField("id", Integer.toString(i), TextField.TYPE_STORED));
+ writer.addDocument(doc);
+ }
+ writer.forceMerge(1);
+ reader = writer.getReader();
+ writer.close();
+ searcher = newSearcher(getOnlyLeafReader(reader));
+ }
+
+ protected String[] docFields = {
+ "w1 w2 w3 w4 w5",
+ "w1 w3 w2 w3 zz",
+ "w1 xx w2 yy w4",
+ "w1 w2 w1 w4 w2 w3",
+ "nothing matches this document"
+ };
+
+ void checkMatches(Query q, String field, int[][] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), false, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
+ int doc = expected[i][0] - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (matches == null) {
+ assertEquals(expected[i].length, 1);
+ continue;
+ }
+ MatchesIterator it = matches.getMatches(field);
+ if (expected[i].length == 1) {
+ assertNull(it);
+ return;
+ }
+ checkFieldMatches(it, expected[i]);
+ checkFieldMatches(matches.getMatches(field), expected[i]); // test multiple calls
+ }
+ }
+
+ void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
+ int pos = 1;
+ while (it.next()) {
+ //System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos + 2] + "->" + expected[i][pos + 3] + "]");
+ assertEquals(expected[pos], it.startPosition());
+ assertEquals(expected[pos + 1], it.endPosition());
+ assertEquals(expected[pos + 2], it.startOffset());
+ assertEquals(expected[pos + 3], it.endOffset());
+ pos += 4;
+ }
+ assertEquals(expected.length, pos);
+ }
+
+ void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), false, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+ int doc = i - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (expected[i]) {
+ MatchesIterator mi = matches.getMatches(field);
+ assertNull(mi);
+ }
+ else {
+ assertNull(matches);
+ }
+ }
+ }
+
+ void checkTerms(Query q, String field, String[][] expected) throws IOException {
+ Weight w = searcher.createWeight(searcher.rewrite(q), false, 1);
+ for (int i = 0; i < expected.length; i++) {
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+ int doc = i - ctx.docBase;
+ Matches matches = w.matches(ctx, doc);
+ if (matches == null) {
+ assertEquals(expected[i].length, 0);
+ continue;
+ }
+ MatchesIterator it = matches.getMatches(field);
+ if (it == null) {
+ assertEquals(expected[i].length, 0);
+ continue;
+ }
+ int pos = 0;
+ while (it.next()) {
+ assertEquals(expected[i][pos], it.term().utf8ToString());
+ pos += 1;
+ }
+ assertEquals(expected[i].length, pos);
+ }
+ }
+
+ public void testTermQuery() throws IOException {
+ Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2 },
+ { 1, 0, 0, 0, 2 },
+ { 2, 0, 0, 0, 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 4 }
+ });
+ }
+
+ public void testTermQueryNoStoredOffsets() throws IOException {
+ Query q = new TermQuery(new Term(FIELD_NO_OFFSETS, "w1"));
+ checkMatches(q, FIELD_NO_OFFSETS, new int[][]{
+ { 0, 0, 0, -1, -1 },
+ { 1, 0, 0, -1, -1 },
+ { 2, 0, 0, -1, -1 },
+ { 3, 0, 0, -1, -1, 2, 2, -1, -1 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_NO_OFFSETS, new String[][]{
+ { "w1" },
+ { "w1" },
+ { "w1" },
+ { "w1", "w1" },
+ {}
+ });
+ }
+
+ public void testTermQueryNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new TermQuery(new Term(field, "w1"));
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testDisjunction() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2, 0, 0, 0, 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ { "w1", "w3" },
+ { "w1", "w3", "w3" },
+ { "w1" },
+ { "w1", "w1", "w3" },
+ {}
+ });
+ }
+
+ public void testDisjunctionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testReqOpt() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testReqOptNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, false, true, false });
+ }
+ }
+
+ public void testMinShouldMatch() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")), BooleanClause.Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build(), BooleanClause.Occur.SHOULD)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11 },
+ { 1, 1, 1, 3, 5, 3, 3, 9, 11 },
+ { 2, 0, 0, 0, 2, 1, 1, 3, 5, 4, 4, 12, 14 },
+ { 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ { "w1", "w3", "w4" },
+ { "w3", "w3" },
+ { "w1", "xx", "w4" },
+ { "w1", "w1", "w4", "w3" },
+ {}
+ });
+ }
+
+ public void testMinShouldMatchNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.SHOULD)
+ .setMinimumNumberShouldMatch(2)
+ .build(), BooleanClause.Occur.SHOULD)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testExclusion() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "zz")), BooleanClause.Occur.MUST_NOT)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 2, 2, 6, 8 },
+ { 1 },
+ { 2 },
+ { 3, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testExclusionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.MUST_NOT)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
+ }
+ }
+
+ public void testConjunction() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.MUST)
+ .build();
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 2, 2, 6, 8, 3, 3, 9, 11 },
+ { 1 },
+ { 2 },
+ { 3, 3, 3, 9, 11, 5, 5, 15, 17 },
+ { 4 }
+ });
+ }
+
+ public void testConjunctionNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
+ .add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.MUST)
+ .build();
+ checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
+ }
+ }
+
+ public void testWildcards() throws IOException {
+ Query q = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "x"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0 },
+ { 1 },
+ { 2, 1, 1, 3, 5 },
+ { 3 },
+ { 4 }
+ });
+ checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
+ {}, {}, { "xx" }, {}
+ });
+
+ Query rq = new RegexpQuery(new Term(FIELD_WITH_OFFSETS, "w[1-2]"));
+ checkMatches(rq, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 1, 1, 3, 5 },
+ { 1, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 2, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
+ { 4 }
+ });
+
+ }
+
+ public void testNoMatchWildcards() throws IOException {
+ Query nomatch = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "wibble"));
+ Matches matches = searcher.createWeight(searcher.rewrite(nomatch), false, 1)
+ .matches(searcher.leafContexts.get(0), 0);
+ assertNull(matches);
+ }
+
+ public void testWildcardsNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new PrefixQuery(new Term(field, "x"));
+ checkNoPositionsMatches(q, field, new boolean[]{ false, false, true, false, false });
+ }
+ }
+
+ public void testSynonymQuery() throws IOException {
+ Query q = new SynonymQuery(new Term(FIELD_WITH_OFFSETS, "w1"), new Term(FIELD_WITH_OFFSETS, "w2"));
+ checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+ { 0, 0, 0, 0, 2, 1, 1, 3, 5 },
+ { 1, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 2, 0, 0, 0, 2, 2, 2, 6, 8 },
+ { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
+ { 4 }
+ });
+ }
+
+ public void testSynonymQueryNoPositions() throws IOException {
+ for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
+ Query q = new SynonymQuery(new Term(field, "w1"), new Term(field, "w2"));
+ checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
+ }
+ }
+
+ public void testMultipleFields() throws IOException {
+ Query q = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("id", "1")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
+ .build();
+ Weight w = searcher.createWeight(searcher.rewrite(q), false, 1);
+
+ LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(1, searcher.leafContexts));
+ Matches m = w.matches(ctx, 1 - ctx.docBase);
+ assertNotNull(m);
+ checkFieldMatches(m.getMatches("id"), new int[]{ -1, 0, 0, -1, -1 });
+ checkFieldMatches(m.getMatches(FIELD_WITH_OFFSETS), new int[]{ -1, 1, 1, 3, 5, 3, 3, 9, 11 });
+ assertNull(m.getMatches("bogus"));
+
+ Set<String> fields = new HashSet<>();
+ for (String field : m) {
+ fields.add(field);
+ }
+ assertEquals(2, fields.size());
+ assertTrue(fields.contains(FIELD_WITH_OFFSETS));
+ assertTrue(fields.contains("id"));
+ }
+
+ protected String[] doc1Fields = {
+ "w1 w2 w3 w4 w5",
+ "w1 w3 w2 w3 zz",
+ "w1 xx w2 yy w4",
+ "w1 w2 w1 w4 w2 w3"
+ };
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index c271ef8..1341f58 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@@ -151,6 +152,28 @@ public class ToParentBlockJoinQuery extends Query {
}
return Explanation.noMatch("Not a match");
}
+
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ // The default implementation would delegate to the joinQuery's Weight, which
+ // matches on children. We need to match on the parent instead
+ Scorer scorer = scorer(context);
+ if (scorer == null) {
+ return null;
+ }
+ final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+ if (twoPhase == null) {
+ if (scorer.iterator().advance(doc) != doc) {
+ return null;
+ }
+ }
+ else {
+ if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
+ return null;
+ }
+ }
+ return Matches.MATCH_WITH_NO_TERMS;
+ }
}
private static class ParentApproximation extends DocIdSetIterator {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
index 174fded..e6c01c0 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
@@ -29,6 +29,7 @@ import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@@ -148,6 +149,11 @@ public final class FunctionScoreQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ return inner.matches(context, doc);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = inner.scorer(context);
if (scorer.iterator().advance(doc) != doc)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
index 807e1c9..ad76b1c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java
@@ -137,6 +137,28 @@ public final class CoveringQuery extends Query {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
+ if (minMatchValues.advanceExact(doc) == false) {
+ return null;
+ }
+ final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
+ long matchCount = 0;
+ List<Matches> subMatches = new ArrayList<>();
+ for (Weight weight : weights) {
+ Matches matches = weight.matches(context, doc);
+ if (matches != null) {
+ matchCount++;
+ subMatches.add(matches);
+ }
+ }
+ if (matchCount < minimumNumberMatch) {
+ return null;
+ }
+ return Matches.fromSubMatches(subMatches);
+ }
+
+ @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
if (minMatchValues.advanceExact(doc) == false) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
new file mode 100644
index 0000000..c5c6e98
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatches.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+class AssertingMatches implements Matches {
+
+ private final Matches in;
+
+ AssertingMatches(Matches matches) {
+ this.in = matches;
+ }
+
+ @Override
+ public MatchesIterator getMatches(String field) throws IOException {
+ MatchesIterator mi = in.getMatches(field);
+ if (mi == null)
+ return null;
+ return new AssertingMatchesIterator(mi);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return in.iterator();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
new file mode 100644
index 0000000..52fb184
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+
+class AssertingMatchesIterator implements MatchesIterator {
+
+ private final MatchesIterator in;
+ private State state = State.UNPOSITIONED;
+
+ private enum State { UNPOSITIONED, ITERATING, EXHAUSTED }
+
+ AssertingMatchesIterator(MatchesIterator in) {
+ this.in = in;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ assert state != State.EXHAUSTED : state;
+ boolean more = in.next();
+ if (more == false) {
+ state = State.EXHAUSTED;
+ }
+ else {
+ state = State.ITERATING;
+ }
+ return more;
+ }
+
+ @Override
+ public int startPosition() {
+ assert state == State.ITERATING : state;
+ return in.startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ assert state == State.ITERATING : state;
+ return in.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ assert state == State.ITERATING : state;
+ return in.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ assert state == State.ITERATING : state;
+ return in.endOffset();
+ }
+
+ @Override
+ public BytesRef term() {
+ assert state == State.ITERATING : state;
+ return in.term();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/502fd4bf/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
index b98e6a1..c4ef409 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
@@ -32,6 +32,14 @@ class AssertingWeight extends FilterWeight {
}
@Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Matches matches = in.matches(context, doc);
+ if (matches == null)
+ return null;
+ return new AssertingMatches(matches);
+ }
+
+ @Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (random.nextBoolean()) {
final Scorer inScorer = in.scorer(context);