You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/29 13:42:12 UTC
svn commit: r1676716 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/search/spans/
core/src/test/org/apache/lucene/search/spans/
test-framework/src/java/org/apache/lucene/search/spans/
Author: rmuir
Date: Wed Apr 29 11:42:11 2015
New Revision: 1676716
URL: http://svn.apache.org/r1676716
Log:
LUCENE-6083: SpanWithinQuery and SpanContainingQuery
Added:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java (with props)
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/spans/SpanTestUtil.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Apr 29 11:42:11 2015
@@ -85,6 +85,9 @@ New Features
* LUCENE-6451: Expressions now support bindings keys that look like
zero arg functions (Jack Conradson via Ryan Ernst)
+* LUCENE-6083: Add SpanWithinQuery and SpanContainingQuery that return
+ spans inside of / containing another spans. (Paul Elschot via Robert Muir)
+
Optimizations
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,102 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.ConjunctionDISI;
+import org.apache.lucene.search.TwoPhaseIterator;
+
+/**
+ * Common super class for multiple sub spans required in a document.
+ */
+abstract class ConjunctionSpans extends Spans {
+ final Spans[] subSpans; // in query order
+ final DocIdSetIterator conjunction; // use to move to next doc with all clauses
+ boolean atFirstInCurrentDoc; // a first start position is available in current doc for nextStartPosition
+ boolean oneExhaustedInCurrentDoc; // one subspans exhausted in current doc
+
+ ConjunctionSpans(List<Spans> subSpans) {
+ if (subSpans.size() < 2) {
+ throw new IllegalArgumentException("Less than 2 subSpans.size():" + subSpans.size());
+ }
+ this.subSpans = subSpans.toArray(new Spans[subSpans.size()]);
+ this.conjunction = ConjunctionDISI.intersect(subSpans);
+ this.atFirstInCurrentDoc = true; // ensure for doc -1 that start/end positions are -1
+ }
+
+ @Override
+ public int docID() {
+ return conjunction.docID();
+ }
+
+ @Override
+ public long cost() {
+ return conjunction.cost();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return (conjunction.nextDoc() == NO_MORE_DOCS)
+ ? NO_MORE_DOCS
+ : toMatchDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return (conjunction.advance(target) == NO_MORE_DOCS)
+ ? NO_MORE_DOCS
+ : toMatchDoc();
+ }
+
+ int toMatchDoc() throws IOException {
+ oneExhaustedInCurrentDoc = false;
+ while (true) {
+ if (twoPhaseCurrentDocMatches()) {
+ return docID();
+ }
+ if (conjunction.nextDoc() == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ }
+ }
+
+
+ abstract boolean twoPhaseCurrentDocMatches() throws IOException;
+
+ /**
+ * Return a {@link TwoPhaseIterator} view of this ConjunctionSpans.
+ */
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
+
+ @Override
+ public boolean matches() throws IOException {
+ return twoPhaseCurrentDocMatches();
+ }
+ };
+ return res;
+ }
+
+ public Spans[] getSubSpans() {
+ return subSpans;
+ }
+}
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,60 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Objects;
+
+abstract class ContainSpans extends ConjunctionSpans {
+ Spans sourceSpans;
+ Spans bigSpans;
+ Spans littleSpans;
+
+ ContainSpans(Spans bigSpans, Spans littleSpans, Spans sourceSpans) {
+ super(Arrays.asList(bigSpans, littleSpans));
+ this.bigSpans = Objects.requireNonNull(bigSpans);
+ this.littleSpans = Objects.requireNonNull(littleSpans);
+ this.sourceSpans = Objects.requireNonNull(sourceSpans);
+ }
+
+ @Override
+ public int startPosition() {
+ return atFirstInCurrentDoc ? -1
+ : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
+ : sourceSpans.startPosition();
+ }
+
+ @Override
+ public int endPosition() {
+ return atFirstInCurrentDoc ? -1
+ : oneExhaustedInCurrentDoc ? NO_MORE_POSITIONS
+ : sourceSpans.endPosition();
+ }
+
+ @Override
+ public boolean isPayloadAvailable() throws IOException {
+ return sourceSpans.isPayloadAvailable();
+ }
+
+ @Override
+ public Collection<byte[]> getPayload() throws IOException {
+ return sourceSpans.getPayload();
+ }
+}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpans.java Wed Apr 29 11:42:11 2015
@@ -17,82 +17,18 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.ConjunctionDISI;
-import org.apache.lucene.search.TwoPhaseIterator;
-
-import java.io.IOException;
import java.util.List;
-import java.util.Objects;
/**
- * Common super class for un/ordered Spans
+ * Common super class for un/ordered Spans with a maximum slop between them.
*/
-abstract class NearSpans extends Spans {
+abstract class NearSpans extends ConjunctionSpans {
final SpanNearQuery query;
final int allowedSlop;
- final Spans[] subSpans; // in query order
- final DocIdSetIterator conjunction; // use to move to next doc with all clauses
- boolean atFirstInCurrentDoc;
- boolean oneExhaustedInCurrentDoc; // no more results possbile in current doc
-
- NearSpans(SpanNearQuery query, List<Spans> subSpans)
- throws IOException {
- this.query = Objects.requireNonNull(query);
+ NearSpans(SpanNearQuery query, List<Spans> subSpans) {
+ super(subSpans);
+ this.query = query;
this.allowedSlop = query.getSlop();
- if (subSpans.size() < 2) {
- throw new IllegalArgumentException("Less than 2 subSpans: " + query);
- }
- this.subSpans = subSpans.toArray(new Spans[subSpans.size()]); // in query order
- this.conjunction = ConjunctionDISI.intersect(subSpans);
- }
-
- @Override
- public int docID() {
- return conjunction.docID();
- }
-
- @Override
- public long cost() {
- return conjunction.cost();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return (conjunction.nextDoc() == NO_MORE_DOCS)
- ? NO_MORE_DOCS
- : toMatchDoc();
- }
-
- @Override
- public int advance(int target) throws IOException {
- return (conjunction.advance(target) == NO_MORE_DOCS)
- ? NO_MORE_DOCS
- : toMatchDoc();
}
-
- abstract int toMatchDoc() throws IOException;
-
- abstract boolean twoPhaseCurrentDocMatches() throws IOException;
-
- /**
- * Return a {@link TwoPhaseIterator} view of this {@link NearSpans}.
- */
- @Override
- public TwoPhaseIterator asTwoPhaseIterator() {
- TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
-
- @Override
- public boolean matches() throws IOException {
- return twoPhaseCurrentDocMatches();
- }
- };
- return res;
- }
-
- public Spans[] getSubSpans() {
- return subSpans;
- }
-
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Wed Apr 29 11:42:11 2015
@@ -56,35 +56,6 @@ public class NearSpansOrdered extends Ne
this.atFirstInCurrentDoc = true; // -1 startPosition/endPosition also at doc -1
}
- /** Advances the subSpans to just after an ordered match with a minimum slop
- * that is smaller than the slop allowed by the SpanNearQuery.
- * @return true iff there is such a match.
- */
- @Override
- int toMatchDoc() throws IOException {
- subSpansToFirstStartPosition();
- while (true) {
- if (! stretchToOrder()) {
- if (conjunction.nextDoc() == NO_MORE_DOCS) {
- return NO_MORE_DOCS;
- }
- subSpansToFirstStartPosition();
- } else {
- if (shrinkToAfterShortestMatch()) {
- atFirstInCurrentDoc = true;
- return conjunction.docID();
- }
- // not a match, after shortest ordered spans, not at beginning of doc.
- if (oneExhaustedInCurrentDoc) {
- if (conjunction.nextDoc() == NO_MORE_DOCS) {
- return NO_MORE_DOCS;
- }
- subSpansToFirstStartPosition();
- }
- }
- }
- }
-
@Override
boolean twoPhaseCurrentDocMatches() throws IOException {
subSpansToFirstStartPosition();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java Wed Apr 29 11:42:11 2015
@@ -192,30 +192,6 @@ public class NearSpansUnordered extends
}
@Override
- int toMatchDoc() throws IOException {
- // at doc with all subSpans
- subSpanCellsToPositionQueue();
- while (true) {
- if (atMatch()) {
- atFirstInCurrentDoc = true;
- oneExhaustedInCurrentDoc = false;
- return conjunction.docID();
- }
- assert minPositionCell().startPosition() != NO_MORE_POSITIONS;
- if (minPositionCell().nextStartPosition() != NO_MORE_POSITIONS) {
- spanPositionQueue.updateTop();
- }
- else { // exhausted a subSpan in current doc
- if (conjunction.nextDoc() == NO_MORE_DOCS) {
- return NO_MORE_DOCS;
- }
- // at doc with all subSpans
- subSpanCellsToPositionQueue();
- }
- }
- }
-
- @Override
boolean twoPhaseCurrentDocMatches() throws IOException {
// at doc with all subSpans
subSpanCellsToPositionQueue();
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainQuery.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,120 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.Set;
+import java.util.Objects;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.Bits;
+
+abstract class SpanContainQuery extends SpanQuery implements Cloneable {
+ SpanQuery big;
+ SpanQuery little;
+
+ SpanContainQuery(SpanQuery big, SpanQuery little, float boost) {
+ this.big = Objects.requireNonNull(big);
+ this.little = Objects.requireNonNull(little);
+ Objects.requireNonNull(big.getField());
+ Objects.requireNonNull(little.getField());
+ if (! big.getField().equals(little.getField())) {
+ throw new IllegalArgumentException("big and little not same field");
+ }
+ this.setBoost(boost);
+ }
+
+ @Override
+ public String getField() { return big.getField(); }
+
+ /** Extract terms from both <code>big</code> and <code>little</code>. */
+ @Override
+ public void extractTerms(Set<Term> terms) {
+ big.extractTerms(terms);
+ little.extractTerms(terms);
+ }
+
+ ArrayList<Spans> prepareConjunction(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
+ Spans bigSpans = big.getSpans(context, acceptDocs, termContexts);
+ if (bigSpans == null) {
+ return null;
+ }
+ Spans littleSpans = little.getSpans(context, acceptDocs, termContexts);
+ if (littleSpans == null) {
+ return null;
+ }
+ ArrayList<Spans> bigAndLittle = new ArrayList<>();
+ bigAndLittle.add(bigSpans);
+ bigAndLittle.add(littleSpans);
+ return bigAndLittle;
+ }
+
+ String toString(String field, String name) {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append(name);
+ buffer.append("(");
+ buffer.append(big.toString(field));
+ buffer.append(", ");
+ buffer.append(little.toString(field));
+ buffer.append(")");
+ return buffer.toString();
+ }
+
+ @Override
+ public abstract SpanContainQuery clone();
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ SpanContainQuery clone = null;
+ SpanQuery rewrittenBig = (SpanQuery) big.rewrite(reader);
+ if (rewrittenBig != big) {
+ clone = this.clone();
+ clone.big = rewrittenBig;
+ }
+ SpanQuery rewrittenLittle = (SpanQuery) little.rewrite(reader);
+ if (rewrittenLittle != little) {
+ if (clone == null) clone = this.clone();
+ clone.little = rewrittenLittle;
+ }
+ return (clone != null) ? clone : this;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (! super.equals(o)) {
+ return false;
+ }
+ SpanContainQuery other = (SpanContainQuery)o;
+ return big.equals(other.big) && little.equals(other.little);
+ }
+
+ @Override
+ public int hashCode() {
+ int h = Integer.rotateLeft(super.hashCode(), 1);
+ h ^= big.hashCode();
+ h = Integer.rotateLeft(h, 1);
+ h ^= little.hashCode();
+ return h;
+ }
+}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,110 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.ArrayList;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.Bits;
+
+/** Keep matches that contain another Spans. */
+public class SpanContainingQuery extends SpanContainQuery {
+ /** Construct a SpanContainingQuery matching spans from <code>big</code>
+ * that contain at least one spans from <code>little</code>.
+ * This query has the boost of <code>big</code>.
+ * <code>big</code> and <code>little</code> must be in the same field.
+ */
+ public SpanContainingQuery(SpanQuery big, SpanQuery little) {
+ super(big, little, big.getBoost());
+ }
+
+ @Override
+ public String toString(String field) {
+ return toString(field, "SpanContaining");
+ }
+
+ @Override
+ public SpanContainingQuery clone() {
+ return new SpanContainingQuery(
+ (SpanQuery) big.clone(),
+ (SpanQuery) little.clone());
+ }
+
+ /**
+ * Return spans from <code>big</code> that contain at least one spans from <code>little</code>.
+ * The payload is from the spans of <code>big</code>.
+ */
+ @Override
+ public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
+ ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
+ if (containerContained == null) {
+ return null;
+ }
+
+ Spans big = containerContained.get(0);
+ Spans little = containerContained.get(1);
+
+ return new ContainSpans(big, little, big) {
+
+ @Override
+ boolean twoPhaseCurrentDocMatches() throws IOException {
+ oneExhaustedInCurrentDoc = false;
+ assert littleSpans.startPosition() == -1;
+ while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
+ while (littleSpans.startPosition() < bigSpans.startPosition()) {
+ if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ return false;
+ }
+ }
+ if (bigSpans.endPosition() >= littleSpans.endPosition()) {
+ atFirstInCurrentDoc = true;
+ return true;
+ }
+ }
+ oneExhaustedInCurrentDoc = true;
+ return false;
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ return bigSpans.startPosition();
+ }
+ while (bigSpans.nextStartPosition() != NO_MORE_POSITIONS) {
+ while (littleSpans.startPosition() < bigSpans.startPosition()) {
+ if (littleSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ return NO_MORE_POSITIONS;
+ }
+ }
+ if (bigSpans.endPosition() >= littleSpans.endPosition()) {
+ return bigSpans.startPosition();
+ }
+ }
+ oneExhaustedInCurrentDoc = true;
+ return NO_MORE_POSITIONS;
+ }
+ };
+ }
+}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,110 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.ArrayList;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.Bits;
+
+/** Keep matches that are contained within another Spans. */
+public class SpanWithinQuery extends SpanContainQuery {
+ /** Construct a SpanWithinQuery matching spans from <code>little</code>
+ * that are inside of <code>big</code>.
+ * This query has the boost of <code>little</code>.
+ * <code>big</code> and <code>little</code> must be in the same field.
+ */
+ public SpanWithinQuery(SpanQuery big, SpanQuery little) {
+ super(big, little, little.getBoost());
+ }
+
+ @Override
+ public String toString(String field) {
+ return toString(field, "SpanWithin");
+ }
+
+ @Override
+ public SpanWithinQuery clone() {
+ return new SpanWithinQuery(
+ (SpanQuery) big.clone(),
+ (SpanQuery) little.clone());
+ }
+
+ /**
+ * Return spans from <code>little</code> that are contained in a spans from <code>big</code>.
+ * The payload is from the spans of <code>little</code>.
+ */
+ @Override
+ public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
+ ArrayList<Spans> containerContained = prepareConjunction(context, acceptDocs, termContexts);
+ if (containerContained == null) {
+ return null;
+ }
+
+ Spans big = containerContained.get(0);
+ Spans little = containerContained.get(1);
+
+ return new ContainSpans(big, little, little) {
+
+ @Override
+ boolean twoPhaseCurrentDocMatches() throws IOException {
+ oneExhaustedInCurrentDoc = false;
+ assert littleSpans.startPosition() == -1;
+ while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
+ while (bigSpans.endPosition() < littleSpans.endPosition()) {
+ if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ return false;
+ }
+ }
+ if (bigSpans.startPosition() <= littleSpans.startPosition()) {
+ atFirstInCurrentDoc = true;
+ return true;
+ }
+ }
+ oneExhaustedInCurrentDoc = true;
+ return false;
+ }
+
+ @Override
+ public int nextStartPosition() throws IOException {
+ if (atFirstInCurrentDoc) {
+ atFirstInCurrentDoc = false;
+ return littleSpans.startPosition();
+ }
+ while (littleSpans.nextStartPosition() != NO_MORE_POSITIONS) {
+ while (bigSpans.endPosition() < littleSpans.endPosition()) {
+ if (bigSpans.nextStartPosition() == NO_MORE_POSITIONS) {
+ oneExhaustedInCurrentDoc = true;
+ return NO_MORE_POSITIONS;
+ }
+ }
+ if (bigSpans.startPosition() <= littleSpans.startPosition()) {
+ return littleSpans.startPosition();
+ }
+ }
+ oneExhaustedInCurrentDoc = true;
+ return NO_MORE_POSITIONS;
+ }
+ };
+ }
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package-info.java Wed Apr 29 11:42:11 2015
@@ -35,6 +35,12 @@
* which occur near one another, and can be used to implement things like
* phrase search (when constructed from {@link org.apache.lucene.search.spans.SpanTermQuery}s)
* and inter-phrase proximity (when constructed from other {@link org.apache.lucene.search.spans.SpanNearQuery}s).</li>
+ *
+ * <li> A {@link org.apache.lucene.search.spans.SpanWithinQuery SpanWithinQuery} matches spans
+ * which occur inside of another spans. </li>
+ *
+ * <li> A {@link org.apache.lucene.search.spans.SpanContainingQuery SpanContainingQuery} matches spans
+ * which contain another spans. </li>
*
* <li>A {@link org.apache.lucene.search.spans.SpanOrQuery SpanOrQuery} merges spans from a
* number of other {@link org.apache.lucene.search.spans.SpanQuery}s.</li>
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java?rev=1676716&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanContainQuery.java Wed Apr 29 11:42:11 2015
@@ -0,0 +1,147 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.CheckHits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.search.spans.SpanTestUtil.*;
+
+public class TestSpanContainQuery extends LuceneTestCase {
+ IndexSearcher searcher;
+ IndexReader reader;
+ Directory directory;
+
+ static final String field = "field";
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ directory = newDirectory();
+ RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+ for (int i = 0; i < docFields.length; i++) {
+ Document doc = new Document();
+ doc.add(newTextField(field, docFields[i], Field.Store.YES));
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ writer.close();
+ searcher = newSearcher(reader);
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ directory.close();
+ super.tearDown();
+ }
+
+ String[] docFields = {
+ "w1 w2 w3 w4 w5",
+ "w1 w3 w2 w3",
+ "w1 xx w2 yy w3",
+ "w1 w3 xx w2 yy w3",
+ };
+
+ void checkHits(Query query, int[] results) throws Exception {
+ CheckHits.checkHits(random(), query, field, searcher, results);
+ }
+
+ Spans makeSpans(SpanQuery sq) throws Exception {
+ return MultiSpansWrapper.wrap(searcher.getIndexReader(), sq);
+ }
+
+ void tstEqualSpans(String mes, SpanQuery expectedQ, SpanQuery actualQ) throws Exception {
+ Spans expected = makeSpans(expectedQ);
+ Spans actual = makeSpans(actualQ);
+ tstEqualSpans(mes, expected, actual);
+ }
+
+ void tstEqualSpans(String mes, Spans expected, Spans actual) throws Exception {
+ while (expected.nextDoc() != Spans.NO_MORE_DOCS) {
+ assertEquals(expected.docID(), actual.nextDoc());
+ assertEquals(expected.docID(), actual.docID());
+ while (expected.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+ assertEquals(expected.startPosition(), actual.nextStartPosition());
+ assertEquals("start", expected.startPosition(), actual.startPosition());
+ assertEquals("end", expected.endPosition(), actual.endPosition());
+ }
+ }
+ }
+
+ public void testSpanContainTerm() throws Exception {
+ SpanQuery stq = spanTermQuery(field, "w3");
+ SpanQuery containingQ = spanContainingQuery(stq, stq);
+ SpanQuery containedQ = spanWithinQuery(stq, stq);
+ tstEqualSpans("containing", stq, containingQ);
+ tstEqualSpans("containing", stq, containedQ);
+ }
+
+ public void testSpanContainPhraseBothWords() throws Exception {
+ String w2 = "w2";
+ String w3 = "w3";
+ SpanQuery phraseQ = spanNearOrderedQuery(field, 0, w2, w3);
+ SpanQuery w23 = spanOrQuery(field, w2, w3);
+ SpanQuery containingPhraseOr = spanContainingQuery(phraseQ, w23);
+ SpanQuery containedPhraseOr = spanWithinQuery(phraseQ, w23);
+ tstEqualSpans("containing phrase or", phraseQ, containingPhraseOr);
+ Spans spans = makeSpans(containedPhraseOr);
+ assertNext(spans,0,1,2);
+ assertNext(spans,0,2,3);
+ assertNext(spans,1,2,3);
+ assertNext(spans,1,3,4);
+ assertFinished(spans);
+ }
+
+ public void testSpanContainPhraseFirstWord() throws Exception {
+ String w2 = "w2";
+ String w3 = "w3";
+ SpanQuery stqw2 = spanTermQuery(field, w2);
+ SpanQuery phraseQ = spanNearOrderedQuery(field, 0, w2, w3);
+ SpanQuery containingPhraseW2 = spanContainingQuery(phraseQ, stqw2);
+ SpanQuery containedPhraseW2 = spanWithinQuery(phraseQ, stqw2);
+ tstEqualSpans("containing phrase w2", phraseQ, containingPhraseW2);
+ Spans spans = makeSpans(containedPhraseW2);
+ assertNext(spans,0,1,2);
+ assertNext(spans,1,2,3);
+ assertFinished(spans);
+ }
+
+ public void testSpanContainPhraseSecondWord() throws Exception {
+ String w2 = "w2";
+ String w3 = "w3";
+ SpanQuery stqw3 = spanTermQuery(field, w3);
+ SpanQuery phraseQ = spanNearOrderedQuery(field, 0, w2, w3);
+ SpanQuery containingPhraseW3 = spanContainingQuery(phraseQ, stqw3);
+ SpanQuery containedPhraseW3 = spanWithinQuery(phraseQ, stqw3);
+ tstEqualSpans("containing phrase w3", phraseQ, containingPhraseW3);
+ Spans spans = makeSpans(containedPhraseW3);
+ assertNext(spans,0,2,3);
+ assertNext(spans,1,3,4);
+ assertFinished(spans);
+ }
+
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java Wed Apr 29 11:42:11 2015
@@ -419,4 +419,37 @@ public class TestSpanSearchEquivalence e
Query q2 = nearQuery;
assertSameSet(q1, q2);
}
+
+ /** SpanWithinQuery(A, B) â SpanNearQuery(A) */
+ public void testSpanWithinVsNear() throws Exception {
+ Term t1 = randomTerm();
+ Term t2 = randomTerm();
+ SpanQuery subquery[] = new SpanQuery[] {
+ spanQuery(new SpanTermQuery(t1)),
+ spanQuery(new SpanTermQuery(t2))
+ };
+ SpanQuery nearQuery = spanQuery(new SpanNearQuery(subquery, 10, true));
+
+ Term t3 = randomTerm();
+ SpanQuery termQuery = spanQuery(new SpanTermQuery(t3));
+ Query q1 = spanQuery(new SpanWithinQuery(nearQuery, termQuery));
+ assertSubsetOf(q1, termQuery);
+ }
+
+ /** SpanWithinQuery(A, B) = SpanContainingQuery(A, B) */
+ public void testSpanWithinVsContaining() throws Exception {
+ Term t1 = randomTerm();
+ Term t2 = randomTerm();
+ SpanQuery subquery[] = new SpanQuery[] {
+ spanQuery(new SpanTermQuery(t1)),
+ spanQuery(new SpanTermQuery(t2))
+ };
+ SpanQuery nearQuery = spanQuery(new SpanNearQuery(subquery, 10, true));
+
+ Term t3 = randomTerm();
+ SpanQuery termQuery = spanQuery(new SpanTermQuery(t3));
+ Query q1 = spanQuery(new SpanWithinQuery(nearQuery, termQuery));
+ Query q2 = spanQuery(new SpanContainingQuery(nearQuery, termQuery));
+ assertSameSet(q1, q2);
+ }
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/spans/SpanTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/spans/SpanTestUtil.java?rev=1676716&r1=1676715&r2=1676716&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/spans/SpanTestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/spans/SpanTestUtil.java Wed Apr 29 11:42:11 2015
@@ -90,6 +90,20 @@ public class SpanTestUtil {
}
/**
+ * Makes a new SpanContainingQuery (with additional asserts).
+ */
+ public static SpanQuery spanContainingQuery(SpanQuery big, SpanQuery little) {
+ return spanQuery(new SpanContainingQuery(big, little));
+ }
+
+ /**
+ * Makes a new SpanWithinQuery (with additional asserts).
+ */
+ public static SpanQuery spanWithinQuery(SpanQuery big, SpanQuery little) {
+ return spanQuery(new SpanWithinQuery(big, little));
+ }
+
+ /**
* Makes a new ordered SpanNearQuery (with additional asserts) from the provided {@code terms}
*/
public static SpanQuery spanNearOrderedQuery(String field, int slop, String... terms) {