You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/14 18:02:32 UTC
svn commit: r1201787 - in /lucene/dev/trunk/lucene: ./
contrib/highlighter/src/java/org/apache/lucene/search/highlight/
src/java/org/apache/lucene/search/
src/java/org/apache/lucene/search/payloads/
src/java/org/apache/lucene/search/spans/ src/test/org...
Author: rmuir
Date: Mon Nov 14 17:02:30 2011
New Revision: 1201787
URL: http://svn.apache.org/viewvc?rev=1201787&view=rev
Log:
LUCENE-3533: nuke spanfilters
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SpanFilter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SpanFilterResult.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Nov 14 17:02:30 2011
@@ -199,6 +199,9 @@ Changes in backwards compatibility polic
as these are no longer used by the scoring system. See MIGRATE.txt for more
details. (Robert Muir)
+* LUCENE-3533: Removed SpanFilters, they created large lists of objects and
+ did not scale. (Robert Muir)
+
Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Nov 14 17:02:30 2011
@@ -25,6 +25,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -42,6 +43,7 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@@ -247,16 +249,21 @@ public class WeightedSpanTermExtractor {
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
for (final String field : fieldNames) {
-
- AtomicReaderContext context = getLeafContextForField(field);
- Bits acceptDocs = context.reader.getLiveDocs();
- final Spans spans;
+ final SpanQuery q;
if (mustRewriteQuery) {
- spans = queries.get(field).getSpans(context, acceptDocs);
+ q = queries.get(field);
} else {
- spans = spanQuery.getSpans(context, acceptDocs);
+ q = spanQuery;
}
-
+ AtomicReaderContext context = getLeafContextForField(field);
+ Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
+ TreeSet<Term> extractedTerms = new TreeSet<Term>();
+ q.extractTerms(extractedTerms);
+ for (Term term : extractedTerms) {
+ termContexts.put(term, TermContext.build(context, term, true));
+ }
+ Bits acceptDocs = context.reader.getLiveDocs();
+ final Spans spans = q.getSpans(context, acceptDocs, termContexts);
// collect span positions
while (spans.next()) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java Mon Nov 14 17:02:30 2011
@@ -150,7 +150,7 @@ public class PayloadNearQuery extends Sp
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
- return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs), this,
+ return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java Mon Nov 14 17:02:30 2011
@@ -20,8 +20,11 @@ package org.apache.lucene.search.payload
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
@@ -41,6 +44,7 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.TermContext;
/**
* Experimental class to get set of payloads for most standard Lucene queries.
@@ -174,9 +178,15 @@ public class PayloadSpanUtil {
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException {
+ Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
+ TreeSet<Term> terms = new TreeSet<Term>();
+ query.extractTerms(terms);
+ for (Term term : terms) {
+ termContexts.put(term, TermContext.build(context, term, true));
+ }
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
for (AtomicReaderContext atomicReaderContext : leaves) {
- final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs());
+ final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java Mon Nov 14 17:02:30 2011
@@ -81,7 +81,7 @@ public class PayloadTermQuery extends Sp
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
- return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs),
+ return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.sloppyDocScorer(stats, query.getField(), context));
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java Mon Nov 14 17:02:30 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
@@ -27,6 +28,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
/**
@@ -93,8 +95,8 @@ public class FieldMaskingSpanQuery exten
// ...this is done to be more consistent with things like SpanFirstQuery
@Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
- return maskedQuery.getSpans(context, acceptDocs);
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ return maskedQuery.getSpans(context, acceptDocs, termContexts);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Mon Nov 14 17:02:30 2011
@@ -17,9 +17,11 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
import java.io.IOException;
import java.util.ArrayList;
@@ -28,6 +30,7 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
+import java.util.Map;
import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
@@ -78,11 +81,11 @@ public class NearSpansOrdered extends Sp
private SpanNearQuery query;
private boolean collectPayloads = true;
- public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs) throws IOException {
- this(spanNearQuery, context, acceptDocs, true);
+ public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ this(spanNearQuery, context, acceptDocs, termContexts, true);
}
- public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, boolean collectPayloads)
+ public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
throws IOException {
if (spanNearQuery.getClauses().length < 2) {
throw new IllegalArgumentException("Less than 2 clauses: "
@@ -95,7 +98,7 @@ public class NearSpansOrdered extends Sp
matchPayload = new LinkedList<byte[]>();
subSpansByDoc = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) {
- subSpans[i] = clauses[i].getSpans(context, acceptDocs);
+ subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
}
query = spanNearQuery; // kept for toString() only.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java Mon Nov 14 17:02:30 2011
@@ -17,14 +17,17 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.TermContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.HashSet;
@@ -132,7 +135,7 @@ public class NearSpansUnordered extends
}
- public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs)
+ public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
throws IOException {
this.query = query;
this.slop = query.getSlop();
@@ -142,7 +145,7 @@ public class NearSpansUnordered extends
subSpans = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) {
SpansCell cell =
- new SpansCell(clauses[i].getSpans(context, acceptDocs), i);
+ new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
ordered.add(cell);
subSpans[i] = cell.spans;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java Mon Nov 14 17:02:30 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
+import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
@@ -90,7 +91,7 @@ public class SpanMultiTermQueryWrapper<Q
}
@Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
throw new UnsupportedOperationException("Query should have been rewritten");
}
@@ -157,6 +158,9 @@ public class SpanMultiTermQueryWrapper<Q
@Override
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
+ // TODO: would be nice to not lose term-state here.
+ // we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span
+ // (if you put this thing in another span query, it would extractTerms/double-seek anyway)
final SpanTermQuery q = new SpanTermQuery(term);
q.setBoost(boost);
topLevel.addClause(q);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java Mon Nov 14 17:02:30 2011
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.Map;
import java.util.Set;
@@ -31,6 +32,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
/** Matches spans which are near one another. One can specify <i>slop</i>, the
@@ -118,16 +120,16 @@ public class SpanNearQuery extends SpanQ
}
@Override
- public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 0) // optimize 0-clause case
- return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs);
+ return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
if (clauses.size() == 1) // optimize 1-clause case
- return clauses.get(0).getSpans(context, acceptDocs);
+ return clauses.get(0).getSpans(context, acceptDocs, termContexts);
return inOrder
- ? (Spans) new NearSpansOrdered(this, context, acceptDocs, collectPayloads)
- : (Spans) new NearSpansUnordered(this, context, acceptDocs);
+ ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
+ : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java Mon Nov 14 17:02:30 2011
@@ -22,11 +22,13 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Map;
import java.util.Set;
/** Removes matches which overlap with another SpanQuery. */
@@ -76,12 +78,12 @@ public class SpanNotQuery extends SpanQu
}
@Override
- public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
+ public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
return new Spans() {
- private Spans includeSpans = include.getSpans(context, acceptDocs);
+ private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
private boolean moreInclude = true;
- private Spans excludeSpans = exclude.getSpans(context, acceptDocs);
+ private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
private boolean moreExclude = excludeSpans.next();
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java Mon Nov 14 17:02:30 2011
@@ -23,6 +23,7 @@ import java.util.List;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
@@ -30,6 +31,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query;
@@ -164,9 +166,9 @@ public class SpanOrQuery extends SpanQue
}
@Override
- public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
+ public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 1) // optimize 1-clause case
- return (clauses.get(0)).getSpans(context, acceptDocs);
+ return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
return new Spans() {
private SpanQueue queue = null;
@@ -175,7 +177,7 @@ public class SpanOrQuery extends SpanQue
queue = new SpanQueue(clauses.size());
Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) {
- Spans spans = i.next().getSpans(context, acceptDocs);
+ Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) {
queue.add(spans);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java Mon Nov 14 17:02:30 2011
@@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Map;
import java.util.Set;
@@ -82,8 +84,8 @@ public abstract class SpanPositionCheckQ
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
@Override
- public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
- return new PositionCheckSpan(context, acceptDocs);
+ public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ return new PositionCheckSpan(context, acceptDocs, termContexts);
}
@@ -107,8 +109,8 @@ public abstract class SpanPositionCheckQ
protected class PositionCheckSpan extends Spans {
private Spans spans;
- public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs) throws IOException {
- spans = match.getSpans(context, acceptDocs);
+ public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ spans = match.getSpans(context, acceptDocs, termContexts);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java Mon Nov 14 17:02:30 2011
@@ -18,18 +18,21 @@ package org.apache.lucene.search.spans;
*/
import java.io.IOException;
+import java.util.Map;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
/** Base class for span-based queries. */
public abstract class SpanQuery extends Query {
/** Expert: Returns the matches for this query in an index. Used internally
* to search for spans. */
- public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
+ public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
/** Returns the name of the field matched by this query.*/
public abstract String getField();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Mon Nov 14 17:02:30 2011
@@ -19,12 +19,19 @@ package org.apache.lucene.search.spans;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
+import java.util.Map;
import java.util.Set;
/** Matches spans containing a term. */
@@ -82,22 +89,46 @@ public class SpanTermQuery extends SpanQ
}
@Override
- public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
- final IndexReader reader = context.reader;
- final DocsAndPositionsEnum postings = reader.termPositionsEnum(acceptDocs,
- term.field(),
- term.bytes());
+ public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
+ TermContext termContext = termContexts.get(term);
+ final TermState state;
+ if (termContext == null) {
+ // this happens with span-not query, as it doesn't include the NOT side in extractTerms()
+ // so we seek to the term now in this segment..., this sucks because its ugly mostly!
+ final Fields fields = context.reader.fields();
+ if (fields != null) {
+ final Terms terms = fields.terms(term.field());
+ if (terms != null) {
+ final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
+ if (termsEnum.seekExact(term.bytes(), true)) {
+ state = termsEnum.termState();
+ } else {
+ state = null;
+ }
+ } else {
+ state = null;
+ }
+ } else {
+ state = null;
+ }
+ } else {
+ state = termContext.get(context.ord);
+ }
+
+ if (state == null) { // term is not present in that reader
+ return TermSpans.EMPTY_TERM_SPANS;
+ }
+
+ final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
+ termsEnum.seekExact(term.bytes(), state);
+
+ final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
if (postings != null) {
return new TermSpans(postings, term);
} else {
- if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
- // term does exist, but has no positions
- throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
- } else {
- // term does not exist
- return TermSpans.EMPTY_TERM_SPANS;
- }
+ // term does exist, but has no positions
+ throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
}
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java Mon Nov 14 17:02:30 2011
@@ -27,7 +27,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import java.io.IOException;
-import java.util.Set;
+import java.util.HashMap;
+import java.util.Map;
import java.util.TreeSet;
/**
@@ -35,7 +36,7 @@ import java.util.TreeSet;
*/
public class SpanWeight extends Weight {
protected Similarity similarity;
- protected Set<Term> terms;
+ protected Map<Term,TermContext> termContexts;
protected SpanQuery query;
protected Similarity.Stats stats;
@@ -44,15 +45,16 @@ public class SpanWeight extends Weight {
this.similarity = searcher.getSimilarityProvider().get(query.getField());
this.query = query;
- terms=new TreeSet<Term>();
+ termContexts = new HashMap<Term,TermContext>();
+ TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
final ReaderContext context = searcher.getTopReaderContext();
- final TermContext states[] = new TermContext[terms.size()];
final TermStatistics termStats[] = new TermStatistics[terms.size()];
int i = 0;
for (Term term : terms) {
- states[i] = TermContext.build(context, term, true);
- termStats[i] = searcher.termStatistics(term, states[i]);
+ TermContext state = TermContext.build(context, term, true);
+ termStats[i] = searcher.termStatistics(term, state);
+ termContexts.put(term, state);
i++;
}
stats = similarity.computeStats(
@@ -77,7 +79,7 @@ public class SpanWeight extends Weight {
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
- return new SpanScorer(query.getSpans(context, acceptDocs), this, similarity.sloppyDocScorer(stats, query.getField(), context));
+ return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
}
@Override
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java Mon Nov 14 17:02:30 2011
@@ -281,19 +281,6 @@ final class JustCompileSearch {
}
}
- static final class JustCompileSpanFilter extends SpanFilter {
-
- @Override
- public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
- return null;
- }
- }
-
static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> {
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java Mon Nov 14 17:02:30 2011
@@ -19,11 +19,14 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
+import java.util.Map;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.TermContext;
/**
* Holds all implementations of classes in the o.a.l.s.spans package as a
@@ -83,7 +86,7 @@ final class JustCompileSearchSpans {
}
@Override
- public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java?rev=1201787&r1=1201786&r2=1201787&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java Mon Nov 14 17:02:30 2011
@@ -20,11 +20,16 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeSet;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.TermContext;
/**
*
@@ -39,19 +44,27 @@ public class MultiSpansWrapper extends S
private AtomicReaderContext[] leaves;
private int leafOrd = 0;
private Spans current;
+ private Map<Term,TermContext> termContexts;
- private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) {
+ private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
this.query = query;
this.leaves = leaves;
+ this.termContexts = termContexts;
}
public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
+ Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
+ TreeSet<Term> terms = new TreeSet<Term>();
+ query.extractTerms(terms);
+ for (Term term : terms) {
+ termContexts.put(term, TermContext.build(topLevelReaderContext, term, true));
+ }
AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext);
if(leaves.length == 1) {
- return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs());
+ return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs(), termContexts);
}
- return new MultiSpansWrapper(leaves, query);
+ return new MultiSpansWrapper(leaves, query, termContexts);
}
@Override
@@ -60,14 +73,14 @@ public class MultiSpansWrapper extends S
return false;
}
if (current == null) {
- current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
+ current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
}
while(true) {
if (current.next()) {
return true;
}
if (++leafOrd < leaves.length) {
- current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
+ current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} else {
current = null;
break;
@@ -85,17 +98,17 @@ public class MultiSpansWrapper extends S
int subIndex = ReaderUtil.subIndex(target, leaves);
assert subIndex >= leafOrd;
if (subIndex != leafOrd) {
- current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
+ current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs(), termContexts);
leafOrd = subIndex;
} else if (current == null) {
- current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
+ current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
}
while (true) {
if (current.skipTo(target - leaves[leafOrd].docBase)) {
return true;
}
if (++leafOrd < leaves.length) {
- current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
+ current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} else {
current = null;
break;