You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/07/14 18:58:13 UTC
svn commit: r1361560 - in /lucene/dev/branches/LUCENE-2878/lucene:
core/src/java/org/apache/lucene/search/
core/src/java/org/apache/lucene/search/positions/
core/src/test/org/apache/lucene/search/
core/src/test/org/apache/lucene/search/positions/ highl...
Author: simonw
Date: Sat Jul 14 16:58:12 2012
New Revision: 1361560
URL: http://svn.apache.org/viewvc?rev=1361560&view=rev
Log:
LUCENE-2878: Add sloppy phrase PositionIterator
Added:
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/MaxLengthPositionIntervalIterator.java
Modified:
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/DisjunctionPositionIterator.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueue.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueueAnd.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java
lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Sat Jul 14 16:58:12 2012
@@ -17,14 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.*;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
import org.apache.lucene.search.TermQuery.TermDocsEnumFactory;
@@ -34,6 +27,12 @@ import org.apache.lucene.search.similari
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
* BooleanQuerys.
@@ -376,8 +375,8 @@ public class BooleanQuery extends Query
// and fallback to full match-only scorer:
return createMatchOnlyConjunctionTermScorer(context, acceptDocs);
}
- TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, docsAndFreqsEnum, docsAndFreqsEnum, acceptDocs);
- docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, factory);
+ TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, acceptDocs);
+ docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, docsAndFreqsEnum, factory);
}
return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
@@ -394,8 +393,8 @@ public class BooleanQuery extends Query
return null;
}
final ExactSimScorer docScorer = weight.createDocScorer(context);
- TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, termsEnum.docs(acceptDocs, null, false), null, acceptDocs);
- docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, factory);
+ TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, acceptDocs);
+ docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, termsEnum.docs(acceptDocs, null, false), factory);
}
return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java Sat Jul 14 16:58:12 2012
@@ -105,16 +105,16 @@ class ConjunctionTermScorer extends Scor
}
static final class DocsAndFreqs {
- final DocsEnum docsAndFreqs;
+ //final DocsEnum docsAndFreqs;
final DocsEnum docs;
final int docFreq;
final ExactSimScorer docScorer;
int doc = -1;
private final TermDocsEnumFactory factory;
- DocsAndFreqs( int docFreq, ExactSimScorer docScorer, TermDocsEnumFactory factory) throws IOException {
- this.docsAndFreqs = factory.docsAndFreqsEnum();
- this.docs = factory.docsEnum();
+ DocsAndFreqs( int docFreq, ExactSimScorer docScorer, DocsEnum docs, TermDocsEnumFactory factory) throws IOException {
+ //this.docsAndFreqs = factory.docsAndFreqsEnum();
+ this.docs = docs;
this.docFreq = docFreq;
this.docScorer = docScorer;
this.factory = factory;
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java Sat Jul 14 16:58:12 2012
@@ -206,7 +206,7 @@ public class MultiPhraseQuery extends Qu
// None of the terms are in this reader
return null;
}
- factory = null; // nocommit - what to do here
+ factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum);
} else {
final Term term = terms[0];
TermState termState = termContexts.get(term).get(context.ord);
@@ -223,8 +223,7 @@ public class MultiPhraseQuery extends Qu
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
- docFreq = termsEnum.docFreq();
- factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(term.bytes()), termState, termsEnum, postingsEnum, postingsEnum, acceptDocs);
+ factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(term.bytes()), termsEnum, acceptDocs);
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos).intValue(), terms);
@@ -393,6 +392,27 @@ public class MultiPhraseQuery extends Qu
}
return true;
}
+
+ private static class MultiTermDocsEnumFactory extends TermQuery.TermDocsEnumFactory {
+
+ AtomicReaderContext context;
+ Term[] terms;
+ Map<Term, TermContext> termContexts;
+
+ MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
+ super(termsEnum, liveDocs);
+ this.context = context;
+ this.terms = terms;
+ this.termContexts = termContexts;
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets) throws IOException {
+ return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, offsets);
+ }
+
+ }
}
/**
@@ -421,25 +441,41 @@ class UnionDocsAndPositionsEnum extends
}
}
- private static final class IntQueue {
- private int _arraySize = 16;
+ // TODO: Reimplement this as int[_arraySize * 3], storing position at i * 3,
+ // startOffset at i * 3 + 1 and endOffset at i * 3 + 2. Will need to also
+ // implement a new SorterTemplate to sort the array.
+
+ private static final class PositionQueue {
+ private int _arraySize = 48;
private int _index = 0;
private int _lastIndex = 0;
private int[] _array = new int[_arraySize];
- final void add(int i) {
- if (_lastIndex == _arraySize)
+ final void add(int pos, int start, int end) {
+ if (_lastIndex * 3 == _arraySize)
growArray();
- _array[_lastIndex++] = i;
+ _array[_lastIndex * 3] = pos;
+ _array[_lastIndex * 3 + 1] = start;
+ _array[_lastIndex * 3 + 2] = end;
+ _lastIndex += 1;
}
final int next() {
- return _array[_index++];
+ return _array[_index++ * 3];
+ }
+
+ final int startOffset() {
+ return _array[(_index - 1) * 3 + 1];
+ }
+
+ final int endOffset() {
+ return _array[(_index - 1) * 3 + 2];
}
final void sort() {
- Arrays.sort(_array, _index, _lastIndex);
+ //Arrays.sort(_array, _index, _lastIndex);
+ sorter.quickSort(_index, _lastIndex - 1);
}
final void clear() {
@@ -457,14 +493,52 @@ class UnionDocsAndPositionsEnum extends
_array = newArray;
_arraySize *= 2;
}
+
+ private SorterTemplate sorter = new SorterTemplate() {
+ private int pivot;
+
+ @Override
+ protected void swap(int i, int j) {
+ int ti = _array[i * 3];
+ int ts = _array[i * 3 + 1];
+ int te = _array[i * 3 + 2];
+ _array[i * 3] = _array[j * 3];
+ _array[i * 3 + 1] = _array[j * 3 + 1];
+ _array[i * 3 + 2] = _array[j * 3 + 2];
+ _array[j * 3] = ti;
+ _array[j * 3 + 1] = ts;
+ _array[j * 3 + 2] = te;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return _array[i * 3] - _array[j * 3];
+ }
+
+ @Override
+ protected void setPivot(int i) {
+ pivot = i;
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return pivot - _array[j * 3];
+ }
+ };
}
private int _doc;
private int _freq;
private DocsQueue _queue;
- private IntQueue _posList;
+ private PositionQueue _posList;
+
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
+ this(liveDocs, context, terms, termContexts, termsEnum, false);
+ }
- public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map<Term,TermContext> termContexts, TermsEnum termsEnum, boolean needsOffsets) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
@@ -474,7 +548,7 @@ class UnionDocsAndPositionsEnum extends
continue;
}
termsEnum.seekExact(term.bytes(), termState);
- DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, false);
+ DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
@@ -483,7 +557,7 @@ class UnionDocsAndPositionsEnum extends
}
_queue = new DocsQueue(docsEnums);
- _posList = new IntQueue();
+ _posList = new PositionQueue();
}
@Override
@@ -505,7 +579,7 @@ class UnionDocsAndPositionsEnum extends
final int freq = postings.freq();
for (int i = 0; i < freq; i++) {
- _posList.add(postings.nextPosition());
+ _posList.add(postings.nextPosition(), postings.startOffset(), postings.endOffset());
}
if (postings.nextDoc() != NO_MORE_DOCS) {
@@ -528,12 +602,12 @@ class UnionDocsAndPositionsEnum extends
@Override
public int startOffset() {
- return -1;
+ return _posList.startOffset();
}
@Override
public int endOffset() {
- return -1;
+ return _posList.endOffset();
}
@Override
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java Sat Jul 14 16:58:12 2012
@@ -17,8 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Term;
+
import java.io.IOException;
-import org.apache.lucene.index.*;
/**
* Position of a term in a document that takes into account the term offset within the phrase.
@@ -44,6 +46,7 @@ final class PhrasePositions {
final boolean next() throws IOException { // increments to next doc
doc = postings.nextDoc();
+
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
@@ -59,6 +62,7 @@ final class PhrasePositions {
}
final void firstPosition() throws IOException {
+
count = postings.freq(); // read first pos
nextPosition();
}
@@ -80,10 +84,14 @@ final class PhrasePositions {
/** for debug purposes */
@Override
public String toString() {
- String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+ String s = "d:"+doc+" offset:"+offset+" position:"+position+" c:"+count + " actualPos: " + (position + offset);
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}
+ s += " t: [" + terms[0];
+ for (int i = 1; i < terms.length; i++)
+ s += "," + terms[1];
+ s += "]";
return s;
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Sat Jul 14 16:58:12 2012
@@ -263,7 +263,7 @@ public class PhraseQuery extends Query {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
- TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(t.bytes()), state, te, null, null, acceptDocs);
+ TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(t.bytes()), te, acceptDocs);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java Sat Jul 14 16:58:12 2012
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.search.similarities.Similarity;
+import java.io.IOException;
+
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positions. What "valid positions" are
@@ -37,23 +37,31 @@ abstract class PhraseScorer extends Scor
private float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppySimScorer docScorer;
+ protected final PhraseQuery.PostingsAndFreq[] postings;
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SloppySimScorer docScorer) {
+ Similarity.SloppySimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
-
+ this.postings = postings;
+ reset(false);
+ }
+ protected PhrasePositions[] _THEPOS;
+ void reset(boolean needsOffsets) throws IOException {
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
+ _THEPOS = new PhrasePositions[postings.length];
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
+ _THEPOS[0] = min;
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
+ _THEPOS[i] = pp;
max.next = pp;
max = pp;
max.doc = -1;
@@ -77,7 +85,7 @@ abstract class PhraseScorer extends Scor
return docScorer.score(max.doc, freq);
}
- private boolean advanceMin(int target) throws IOException {
+ protected boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Sat Jul 14 16:58:12 2012
@@ -17,14 +17,23 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.positions.ConjunctionPositionIterator;
+import org.apache.lucene.search.positions.MaxLengthPositionIntervalIterator;
import org.apache.lucene.search.positions.PositionIntervalIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.OpenBitSet;
-import java.io.IOException;
-import java.util.*;
-
final class SloppyPhraseScorer extends PhraseScorer {
private final int slop;
@@ -40,12 +49,20 @@ final class SloppyPhraseScorer extends P
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- int slop, Similarity.SloppySimScorer docScorer) {
+ int slop, Similarity.SloppySimScorer docScorer) throws IOException {
super(weight, postings, docScorer);
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
}
+
+ String current() {
+ StringBuilder b = new StringBuilder();
+ for (PhrasePositions phrasePositions : _THEPOS) {
+ b.append(phrasePositions).append("\n");
+ }
+ return b.toString();
+ }
/**
* Score a candidate doc for all slop-valid position-combinations (matches)
@@ -74,26 +91,44 @@ final class SloppyPhraseScorer extends P
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
int next = pq.top().position;
+
+ int _lPos = pp.position;
+ int _lend = end;
+ String _s = current();
+ Term[] _lTerms = pp.terms;
while (advancePP(pp)) {
if (hasRpts && !advanceRpts(pp)) {
break; // pps exhausted
}
if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
+ System.out.println(_s);
+ System.out.println("match: " + _lPos + " " + _lend + " " + Arrays.toString(_lTerms));
freq += docScorer.computeSlopFactor(matchLength); // score match
}
pq.add(pp);
pp = pq.pop();
next = pq.top().position;
matchLength = end - pp.position;
+ _lPos = pp.position;
+ _lend = end;
+ _lTerms = pp.terms;
+ _s = current();
} else {
int matchLength2 = end - pp.position;
+
if (matchLength2 < matchLength) {
+ _lPos = pp.position;
+ _lend = end;
+ _lTerms = pp.terms;
+ _s = current();
matchLength = matchLength2;
}
}
}
if (matchLength <= slop) {
+ System.out.println(_s);
+ System.out.println("match: " + _lPos + " " + _lend + " " + Arrays.toString(_lTerms));
freq += docScorer.computeSlopFactor(matchLength); // score match
}
return freq;
@@ -480,30 +515,84 @@ final class SloppyPhraseScorer extends P
}
@Override
- public PositionIntervalIterator positions(boolean needsPayloads, boolean needsOffsets, boolean collectPositions) throws IOException {
- // nocommit implement this (and get a beer before you do so!)
- throw new UnsupportedOperationException();
- }
-
-// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
-// //if (min.doc != ?) return;
-// ps.println();
-// ps.println("---- "+title);
-// ps.println("EXT: "+ext);
-// PhrasePositions[] t = new PhrasePositions[pq.size()];
-// if (pq.size()>0) {
-// t[0] = pq.pop();
-// ps.println(" " + 0 + " " + t[0]);
-// for (int i=1; i<t.length; i++) {
-// t[i] = pq.pop();
-// assert t[i-1].position <= t[i].position;
-// ps.println(" " + i + " " + t[i]);
-// }
-// // add them back
-// for (int i=t.length-1; i>=0; i--) {
-// pq.add(t[i]);
-// }
-// }
-// }
+ public PositionIntervalIterator positions(boolean needsPayloads,
+ boolean needsOffsets, boolean collectPositions) throws IOException {
+ // nocommit - payloads?
+ PositionIntervalIterator[] termIters = new PositionIntervalIterator[postings.length];
+ Map<Term, MinPosition> map = new HashMap<Term, MinPosition>();
+ for (int i = 0; i < postings.length; i++) {
+ MinPosition minPositions;
+ Term term = postings[i].terms[0];
+ /*
+ * NOCOMMIT This currently only works if there is only one term per position.
+ * For multiple terms we need to extend the MaxLengthPI. and specialize
+ * ConjunctionPositionIterator - we should do this anyway.
+ * We can then pull a D&PEnum per term instead of the union and assign the correct
+ * ords to them internally everything else should just work as before
+ */
+ if (!map.containsKey(term)) {
+ minPositions = new MinPosition();
+ map.put(term, minPositions);
+ } else {
+ minPositions = map.get(term);
+ }
+ DocsAndPositionsEnum docsAndPosEnum = postings[i].factory
+ .docsAndPositionsEnum(needsOffsets);
+ termIters[i] = new GapEnforcingPositionIterator(this, collectPositions, minPositions,
+ new TermScorer.TermPositions(this, docsAndPosEnum, needsPayloads,
+ collectPositions), 0);
+ }
+ ConjunctionPositionIterator iter = new ConjunctionPositionIterator(this,
+ collectPositions, termIters);
+ return new MaxLengthPositionIntervalIterator(this, slop, iter);
+ }
+ private static final class MinPosition {
+ int position = -1;
+ }
+
+ private static class GapEnforcingPositionIterator extends PositionIntervalIterator {
+
+ private final MinPosition minPosition;
+ private final PositionIntervalIterator other;
+ private final int delta;
+
+ public GapEnforcingPositionIterator(Scorer scorer, boolean collectPositions, MinPosition minPosition, PositionIntervalIterator other, int delta) {
+ super(scorer, collectPositions);
+ this.other = other;
+ this.minPosition = minPosition;
+ this.delta = delta;
+ }
+
+ @Override
+ public int advanceTo(int docId) throws IOException {
+ return other.advanceTo(docId);
+ }
+
+ @Override
+ public PositionInterval next() throws IOException {
+ PositionInterval i = null;
+ while((i = other.next()) != null) {
+ assert i.end == i.begin;
+ if (i.begin > minPosition.position - delta) {
+ minPosition.position = i.begin;
+ break;
+ }
+ }
+
+ return i;
+ }
+
+ @Override
+ public void collect(PositionCollector collector) {
+ assert collectPositions;
+ other.collect(collector);
+ }
+
+ @Override
+ public PositionIntervalIterator[] subs(boolean inOrder) {
+ return other.subs(inOrder);
+ }
+
+ }
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermQuery.java Sat Jul 14 16:58:12 2012
@@ -94,7 +94,7 @@ public class TermQuery extends Query {
}
DocsEnum docs = termsEnum.docs(acceptDocs, null, true);
if (docs != null) {
- return new TermScorer(this, new TermDocsEnumFactory(termsEnum, docs, docs, acceptDocs), createDocScorer(context));
+ return new TermScorer(this, docs, new TermDocsEnumFactory(termsEnum, acceptDocs), createDocScorer(context));
} else {
// Index does not store freq info
docs = termsEnum.docs(acceptDocs, null, false);
@@ -248,43 +248,29 @@ public class TermQuery extends Query {
}
static class TermDocsEnumFactory {
- private final TermsEnum termsEnum;
- private final Bits liveDocs;
- private final DocsEnum docs;
- private final DocsEnum docsAndFreqs;
- private final TermState state;
- private BytesRef term;
+ protected final TermsEnum termsEnum;
+ protected final Bits liveDocs;
+ protected final BytesRef term;
- TermDocsEnumFactory(TermsEnum termsEnum, DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs) {
- this(null, null, termsEnum, docs, docsAndFreqs, liveDocs);
-
+ TermDocsEnumFactory(TermsEnum termsEnum, Bits liveDocs) {
+ this(null, termsEnum, liveDocs);
}
- TermDocsEnumFactory(BytesRef term, TermState state, TermsEnum termsEnum,
- DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs) {
+ TermDocsEnumFactory(BytesRef term, TermsEnum termsEnum, Bits liveDocs) {
this.termsEnum = termsEnum;
this.liveDocs = liveDocs;
- this.docs = docs;
- this.docsAndFreqs = docsAndFreqs;
- this.state = state;
this.term = term;
}
- public DocsEnum docsEnum() throws IOException {
- return docs;
- }
public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets)
throws IOException {
- if (state != null) {
+ if (term != null) {
assert term != null;
- termsEnum.seekExact(term, state);
+ termsEnum.seekExact(term, false);
}
return termsEnum.docsAndPositions(liveDocs, null, offsets);
}
-
- public DocsEnum docsAndFreqsEnum() throws IOException{
- return docsAndFreqs;
- }
+
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermScorer.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/TermScorer.java Sat Jul 14 16:58:12 2012
@@ -45,10 +45,10 @@ final class TermScorer extends Scorer {
* The </code>Similarity.ExactSimScorer</code> implementation
* to be used for score computations.
*/
- TermScorer(Weight weight, TermDocsEnumFactory factory, Similarity.ExactSimScorer docScorer) throws IOException {
+ TermScorer(Weight weight, DocsEnum docsEnum, TermDocsEnumFactory factory, Similarity.ExactSimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
- this.docsEnum = factory.docsAndFreqsEnum();
+ this.docsEnum = docsEnum;
this.factory = factory;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java Sat Jul 14 16:58:12 2012
@@ -1,4 +1,5 @@
package org.apache.lucene.search.positions;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -29,30 +30,37 @@ import org.apache.lucene.util.RamUsageEs
* <a href=
* "http://vigna.dsi.unimi.it/ftp/papers/EfficientAlgorithmsMinimalIntervalSemantics"
* >"Efficient Optimally Lazy Algorithms for Minimal-Interval Semantic</a>
+ *
* @lucene.experimental
- */ // nocommit - javadoc
+ */
+// nocommit - javadoc
public final class ConjunctionPositionIterator extends BooleanPositionIterator {
private final IntervalQueueAnd queue;
private final int nrMustMatch;
private SnapshotPositionCollector snapshot;
+ private int matchDistance; // nocommit specialize for this or better move that
+ // out of this class entirely
- public ConjunctionPositionIterator(Scorer scorer, boolean collectPositions, PositionIntervalIterator... iterators) throws IOException {
+ public ConjunctionPositionIterator(Scorer scorer, boolean collectPositions,
+ PositionIntervalIterator... iterators) throws IOException {
this(scorer, collectPositions, iterators.length, iterators);
}
- public ConjunctionPositionIterator(Scorer scorer, boolean collectPositions, int minimuNumShouldMatch, PositionIntervalIterator... iterators) throws IOException {
- super(scorer, iterators, new IntervalQueueAnd(iterators.length), collectPositions);
+ public ConjunctionPositionIterator(Scorer scorer, boolean collectPositions,
+ int minimuNumShouldMatch, PositionIntervalIterator... iterators)
+ throws IOException {
+ super(scorer, iterators, new IntervalQueueAnd(iterators.length),
+ collectPositions);
queue = (IntervalQueueAnd) super.queue; // avoid lots of casts?
this.nrMustMatch = minimuNumShouldMatch;
-
}
-
+
void advance() throws IOException {
final IntervalRef top = queue.top();
PositionInterval interval = null;
- if ((interval = iterators[top.index].next()) != null) {
+ if ((interval = iterators[top.ord].next()) != null) {
top.interval = interval;
- queue.updateRightExtreme(interval);
+ queue.updateRightExtreme(top);
queue.updateTop();
} else {
queue.pop();
@@ -62,21 +70,28 @@ public final class ConjunctionPositionIt
@Override
public PositionInterval next() throws IOException {
- while (queue.size() >= nrMustMatch && queue.top().interval.begin == queue.currentCandidate.begin) {
+ while (queue.size() >= nrMustMatch
+ && queue.top().interval.begin == queue.currentCandidate.begin) {
advance();
}
if (queue.size() < nrMustMatch) {
return null;
}
-
do {
queue.updateCurrentCandidate();
- PositionInterval top = queue.top().interval;
- if(queue.currentCandidate.begin == top.begin && queue.currentCandidate.end == top.end) {
+ PositionInterval top = updateMatchDistance(queue.top()); // nocommit this
+ // should be in a
+ // specialized
+ // class - used
+ // for scoring in
+ // sloppy phrase
+ if (queue.currentCandidate.begin == top.begin
+ && queue.currentCandidate.end == top.end) {
return queue.currentCandidate;
}
if (collectPositions) {
- snapShotSubPositions(); // oddity! see SnapShotCollector below for details!
+ snapShotSubPositions(); // this looks odd? -> see SnapShotCollector below for
+ // details!
}
advance();
if (queue.size() < nrMustMatch) {
@@ -85,7 +100,32 @@ public final class ConjunctionPositionIt
} while (queue.topContainsQueueInterval());
return queue.currentCandidate; // TODO support payloads
}
-
+
+ private final PositionInterval updateMatchDistance(IntervalRef top) {
+ final int end = queue.rightExtreme - queue.rightExtremeOrd;
+ final int head = (top.interval.begin - top.ord);
+ matchDistance = end - head;
+ return top.interval;
+ }
+
+ @Override
+ public int advanceTo(int docId) throws IOException {
+ queue.reset();
+ int advancedTo = -1;
+ for (int i = 0; i < iterators.length; i++) {
+ currentDoc = iterators[i].advanceTo(docId);
+ assert advancedTo == -1 || advancedTo == currentDoc;
+
+ final PositionInterval interval = iterators[i].next();
+ if (interval != null) {
+ IntervalRef intervalRef = new IntervalRef(interval, i);
+ queue.updateRightExtreme(intervalRef);
+ queue.add(intervalRef);
+ }
+ }
+ return currentDoc;
+ }
+
private void snapShotSubPositions() {
if (snapshot == null) {
snapshot = new SnapshotPositionCollector(queue.size());
@@ -100,44 +140,34 @@ public final class ConjunctionPositionIt
for (PositionIntervalIterator iter : iterators) {
iter.collect(collector);
}
+
}
-
+
@Override
public void collect(PositionCollector collector) {
assert collectPositions;
- if(snapshot==null) {
+ if (snapshot == null) {
// we might not be initialized if the first interval matches
collectInternal(collector);
- } else {
+ } else {
snapshot.replay(collector);
}
}
-
- @Override
- public int advanceTo(int docId) throws IOException {
- queue.reset();
- int advancedTo = -1;
- for (int i = 0; i < iterators.length; i++) {
- currentDoc = iterators[i].advanceTo(docId);
- assert advancedTo == -1 || advancedTo == currentDoc;
- final PositionInterval interval = iterators[i].next();
- if (interval != null) {
- queue.updateRightExtreme(interval);
- queue.add(new IntervalRef(interval, i));
- }
- }
- return currentDoc;
+
+ int matchDistance() { // nocommit move out!
+ return matchDistance;
}
-
-
+
/*
* Due to the laziness of this position iterator and the minimizing algorithm
* we advance the underlying iterators before the consumer can call collect on
- * the top level iterator. If we need to collect positions we need to record the
- * last possible match in order to allow the consumer to get the right positions
- * for the match. This is particularly important if leaf positions are required.
+ * the top level iterator. If we need to collect positions we need to record
+ * the last possible match in order to allow the consumer to get the right
+ * positions for the match. This is particularly important if leaf positions
+ * are required.
*/
- private static final class SnapshotPositionCollector implements PositionCollector {
+ private static final class SnapshotPositionCollector implements
+ PositionCollector {
private SingleSnapshot[] snapshots;
private int index = 0;
@@ -151,8 +181,9 @@ public final class ConjunctionPositionIt
collect(scorer, interval, docID, true);
}
-
- private void collect(Scorer scorer, PositionInterval interval, int docID, boolean isLeaf) {
+
+ private void collect(Scorer scorer, PositionInterval interval, int docID,
+ boolean isLeaf) {
if (snapshots.length <= index) {
grow(ArrayUtil.oversize(index + 1,
(RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2)
@@ -165,7 +196,7 @@ public final class ConjunctionPositionIt
}
snapshots[index++].set(scorer, interval, isLeaf, docID);
}
-
+
@Override
public void collectComposite(Scorer scorer, PositionInterval interval,
int docID) {
@@ -176,9 +207,11 @@ public final class ConjunctionPositionIt
for (int i = 0; i < index; i++) {
SingleSnapshot singleSnapshot = snapshots[i];
if (singleSnapshot.isLeaf) {
- collector.collectLeafPosition(singleSnapshot.scorer, singleSnapshot.interval, singleSnapshot.docID);
+ collector.collectLeafPosition(singleSnapshot.scorer,
+ singleSnapshot.interval, singleSnapshot.docID);
} else {
- collector.collectComposite(singleSnapshot.scorer, singleSnapshot.interval, singleSnapshot.docID);
+ collector.collectComposite(singleSnapshot.scorer,
+ singleSnapshot.interval, singleSnapshot.docID);
}
}
}
@@ -198,17 +231,14 @@ public final class ConjunctionPositionIt
final PositionInterval interval = new PositionInterval();
boolean isLeaf;
int docID;
-
void set(Scorer scorer, PositionInterval interval, boolean isLeaf,
- int docID) {
+ int docID) {
this.scorer = scorer;
this.interval.copy(interval);
this.isLeaf = isLeaf;
this.docID = docID;
}
-
-
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/DisjunctionPositionIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/DisjunctionPositionIterator.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/DisjunctionPositionIterator.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/DisjunctionPositionIterator.java Sat Jul 14 16:58:12 2012
@@ -43,7 +43,7 @@ public final class DisjunctionPositionIt
void advance() throws IOException {
final IntervalRef top = queue.top();
PositionInterval interval = null;
- if ((interval = iterators[top.index].next()) != null) {
+ if ((interval = iterators[top.ord].next()) != null) {
top.interval = interval;
queue.updateTop();
} else {
@@ -72,7 +72,7 @@ public final class DisjunctionPositionIt
public void collect(PositionCollector collector) {
assert collectPositions;
collector.collectComposite(scorer, queue.currentCandidate, currentDoc);
- iterators[queue.top().index].collect(collector);
+ iterators[queue.top().ord].collect(collector);
}
@Override
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueue.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueue.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueue.java Sat Jul 14 16:58:12 2012
@@ -43,12 +43,12 @@ abstract class IntervalQueue extends Pri
final static class IntervalRef {
PositionInterval interval;
- int index;
+ int ord; // the ordinal of this ref in the ordered case
IntervalRef(PositionInterval interval, int index) {
super();
this.interval = interval;
- this.index = index;
+ this.ord = index;
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueueAnd.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueueAnd.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueueAnd.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/IntervalQueueAnd.java Sat Jul 14 16:58:12 2012
@@ -25,6 +25,7 @@ final class IntervalQueueAnd extends Int
int rightExtreme = Integer.MIN_VALUE;
int rightExtremeOffset = Integer.MIN_VALUE;
+ int rightExtremeOrd = Integer.MIN_VALUE; // the ord of the queues right extreme - ordered case!
public IntervalQueueAnd(int size) {
super(size);
@@ -36,11 +37,16 @@ final class IntervalQueueAnd extends Int
currentCandidate.end = Integer.MIN_VALUE;
rightExtreme = Integer.MIN_VALUE;
rightExtremeOffset = Integer.MIN_VALUE;
+ rightExtremeOrd = Integer.MIN_VALUE;
}
- public void updateRightExtreme(PositionInterval interval) {
- rightExtreme = Math.max(rightExtreme, interval.end);
- rightExtremeOffset = Math.max(rightExtremeOffset, interval.offsetEnd);
+ public void updateRightExtreme(IntervalRef ref) {
+ if (rightExtreme < ref.interval.end) {
+ rightExtreme = ref.interval.end;
+ rightExtremeOrd = ref.ord;
+ }
+
+ rightExtremeOffset = Math.max(rightExtremeOffset, ref.interval.offsetEnd);
}
public boolean topContainsQueueInterval() {
Added: lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/MaxLengthPositionIntervalIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/MaxLengthPositionIntervalIterator.java?rev=1361560&view=auto
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/MaxLengthPositionIntervalIterator.java (added)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/java/org/apache/lucene/search/positions/MaxLengthPositionIntervalIterator.java Sat Jul 14 16:58:12 2012
@@ -0,0 +1,72 @@
+package org.apache.lucene.search.positions;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.search.Scorer;
+
+
+
+/**
+ * An interval iterator that has the semantics of sloppy phrase query.
+ */
+public class MaxLengthPositionIntervalIterator extends PositionIntervalIterator {
+
+ private final int maxLen;
+ private ConjunctionPositionIterator iter;
+
+ public MaxLengthPositionIntervalIterator(Scorer scorer, int maxLength,
+ ConjunctionPositionIterator iter) throws IOException {
+ super(scorer, iter.collectPositions);
+ this.maxLen = maxLength;
+ this.iter = iter;
+ }
+
+ @Override
+ public int advanceTo(int docId) throws IOException {
+ return iter.advanceTo(docId);
+ }
+
+ @Override
+ public PositionInterval next() throws IOException {
+ PositionInterval current;
+ do {
+ current = iter.next();
+ if (current == null) {
+ break;
+ }
+ //NOCOMMIT this is an impl detail of ConjuIter that shoudl reside somewhere else
+ // maybe specialize for this?
+ if (iter.matchDistance() <= maxLen) {
+ break;
+ }
+ } while(true);
+ return current;
+ }
+
+ @Override
+ public void collect(PositionCollector collector) {
+ assert collectPositions;
+ iter.collect(collector);
+ }
+
+ @Override
+ public PositionIntervalIterator[] subs(boolean inOrder) {
+ return iter.subs(inOrder);
+ }
+
+}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java Sat Jul 14 16:58:12 2012
@@ -193,7 +193,7 @@ final class JustCompileSearch {
static final class JustCompilePhraseScorer extends PhraseScorer {
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SloppySimScorer docScorer) {
+ Similarity.SloppySimScorer docScorer) throws IOException {
super(weight, postings, docScorer);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java Sat Jul 14 16:58:12 2012
@@ -28,24 +28,14 @@ import org.apache.lucene.codecs.pulsing.
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Weight;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import java.io.IOException;
+
public class TestPositionOffsets extends LuceneTestCase {
// What am I testing here?
@@ -93,15 +83,18 @@ public class TestPositionOffsets extends
writer.addDocument(doc);
}
- public void testTermQueryWithOffsets() throws IOException {
+ private void testQuery(Query query, int[][] expectedOffsets) throws IOException {
+ testQuery(query, expectedOffsets, true);
+ }
+
+ private void testQuery(Query query, int[][] expectedOffsets, boolean needsOffsets) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
- addDocs(writer, true);
+ addDocs(writer, needsOffsets);
IndexReader reader = writer.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
writer.close();
- Query query = new TermQuery(new Term("field", "porridge"));
Weight weight = query.createWeight(searcher);
IndexReaderContext topReaderContext = searcher.getTopReaderContext();
@@ -112,9 +105,9 @@ public class TestPositionOffsets extends
int nextDoc = scorer.nextDoc();
assertEquals(0, nextDoc);
- PositionIntervalIterator positions = scorer.positions(false, true, false);
- int[] startOffsets = new int[] { 6, 26, 47, 164, 184 };
- int[] endOffsets = new int[] { 14, 34, 55, 172, 192 };
+ PositionIntervalIterator positions = scorer.positions(false, needsOffsets, false);
+ int startOffsets[] = expectedOffsets[0];
+ int endOffsets[] = expectedOffsets[1];
assertEquals(0, positions.advanceTo(nextDoc));
for (int i = 0; i < startOffsets.length; i++) {
@@ -130,78 +123,63 @@ public class TestPositionOffsets extends
}
public void testTermQueryWithoutOffsets() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
- addDocs(writer, false);
-
- IndexReader reader = writer.getReader();
- IndexSearcher searcher = new IndexSearcher(reader);
- writer.close();
Query query = new TermQuery(new Term("field", "porridge"));
-
- Weight weight = query.createWeight(searcher);
- IndexReaderContext topReaderContext = searcher.getTopReaderContext();
- List<AtomicReaderContext> leaves = topReaderContext.leaves();
- assertEquals(1, leaves.size());
- Scorer scorer = weight.scorer(leaves.get(0),
- true, true, leaves.get(0).reader().getLiveDocs());
-
- int nextDoc = scorer.nextDoc();
- assertEquals(0, nextDoc);
- PositionIntervalIterator positions = scorer.positions(false, false, false);
- int[] startOffsets = new int[] { -1, -1, -1, -1, -1 };
- int[] endOffsets = new int[] { -1, -1, -1, -1, -1 };
-
- assertEquals(0, positions.advanceTo(nextDoc));
- for (int i = 0; i < startOffsets.length; i++) {
- PositionIntervalIterator.PositionInterval interval = positions.next();
- assertEquals(startOffsets[i], interval.offsetBegin);
- assertEquals(endOffsets[i], interval.offsetEnd);
- }
-
- assertNull(positions.next());
-
- reader.close();
- directory.close();
+ int[] startOffsets = new int[] { 6, 26, 47, 164, 184 };
+ int[] endOffsets = new int[] { 14, 34, 55, 172, 192 };
+ testQuery(query, new int[][] { startOffsets, endOffsets });
}
public void testBooleanQueryWithOffsets() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
- addDocs(writer, true);
-
- IndexReader reader = writer.getReader();
- IndexSearcher searcher = new IndexSearcher(reader);
- writer.close();
+
BooleanQuery query = new BooleanQuery();
- query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), BooleanClause.Occur.MUST));
- query.add(new BooleanClause(new TermQuery(new Term("field", "nine")), BooleanClause.Occur.MUST));
-
- Weight weight = query.createWeight(searcher);
- IndexReaderContext topReaderContext = searcher.getTopReaderContext();
- List<AtomicReaderContext> leaves = topReaderContext.leaves();
- assertEquals(1, leaves.size());
- Scorer scorer = weight.scorer(leaves.get(0),
- true, true, leaves.get(0).reader().getLiveDocs());
-
- int nextDoc = scorer.nextDoc();
- assertEquals(0, nextDoc);
- PositionIntervalIterator positions = scorer.positions(false, true, false);
- int[] startOffsetsConj = new int[] { 6, 26, 47, 67, 143};
- int[] endOffsetsConj = new int[] { 71, 71, 71, 172, 172};
- assertEquals(0, positions.advanceTo(nextDoc));
- PositionIntervalIterator.PositionInterval interval;
- int i = 0;
- while((interval = positions.next()) != null) {
- assertEquals(startOffsetsConj[i], interval.offsetBegin);
- assertEquals(endOffsetsConj[i], interval.offsetEnd);
- i++;
- }
- assertEquals(i, startOffsetsConj.length);
- assertNull(positions.next());
-
- reader.close();
- directory.close();
+ query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")),
+ BooleanClause.Occur.MUST));
+ query.add(new BooleanClause(new TermQuery(new Term("field", "nine")),
+ BooleanClause.Occur.MUST));
+ int[] startOffsetsConj = new int[] {6, 26, 47, 67, 143};
+ int[] endOffsetsConj = new int[] {71, 71, 71, 172, 172};
+ testQuery(query, new int[][] {startOffsetsConj, endOffsetsConj});
+ }
+
+ public void testExactPhraseQuery() throws IOException {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "pease"));
+ query.add(new Term("field", "porridge"));
+ query.add(new Term("field", "hot!"));
+ int[] startOffsetsBlock = new int[] {0, 158};
+ int[] endOffsetsBlock = new int[] {19, 177};
+ testQuery(query, new int[][] {startOffsetsBlock, endOffsetsBlock});
+ }
+
+ public void testSloppyPhraseQuery() throws IOException {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "pease"));
+ query.add(new Term("field", "hot!"));
+ query.setSlop(1);
+ int[] startOffsetsBlock = new int[] {0, 158};
+ int[] endOffsetsBlock = new int[] {19, 177};
+ testQuery(query, new int[][] {startOffsetsBlock, endOffsetsBlock});
}
+ public void testManyTermSloppyPhraseQuery() throws IOException {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "pease"));
+ query.add(new Term("field", "porridge"));
+ query.add(new Term("field", "pot"));
+ query.setSlop(2);
+ int[] startOffsetsBlock = new int[] {41};
+ int[] endOffsetsBlock = new int[] {66};
+ testQuery(query, new int[][] {startOffsetsBlock, endOffsetsBlock});
+ }
+
+ public void testMultiTermPhraseQuery() throws IOException {
+ MultiPhraseQuery query = new MultiPhraseQuery();
+ query.add(new Term("field", "pease"));
+ query.add(new Term("field", "porridge"));
+ query
+ .add(new Term[] {new Term("field", "hot!"), new Term("field", "cold!")});
+ int[] startOffsetsBlock = new int[] {0, 20, 158, 178};
+ int[] endOffsetsBlock = new int[] {19, 40, 177, 198};
+ testQuery(query, new int[][] {startOffsetsBlock, endOffsetsBlock});
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java?rev=1361560&r1=1361559&r2=1361560&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java Sat Jul 14 16:58:12 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.search.poshigh
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.Codec;
@@ -48,6 +49,7 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util._TestUtil;
import java.io.IOException;
+import java.io.StringReader;
/**
* TODO: FIX THIS TEST Phrase and Span Queries positions callback API
@@ -146,8 +148,10 @@ public class PosHighlighterTest extends
InvalidTokenOffsetsException {
return doSearch(q, maxFragSize, 0);
}
-
- private String[] doSearch(Query q, int maxFragSize, int docIndex)
+ private String[] doSearch(Query q, int maxFragSize, int docIndex) throws IOException, InvalidTokenOffsetsException {
+ return doSearch(q, maxFragSize, docIndex, false);
+ }
+ private String[] doSearch(Query q, int maxFragSize, int docIndex, boolean analyze)
throws IOException, InvalidTokenOffsetsException {
// ConstantScorer is a fragment Scorer, not a search result (document)
// Scorer
@@ -165,10 +169,18 @@ public class PosHighlighterTest extends
// FIXME: test error cases: for non-stored fields, and fields w/no term
// vectors
// searcher.getIndexReader().getTermFreqVector(doc.doc, F, pom);
-
+ final TokenStream stream;
+ if (analyze) {
+ stream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true,
+ MockTokenFilter.EMPTY_STOPSET, true).tokenStream(F,
+ new StringReader(text));
+ } else {
+ stream = new PosTokenStream(text, new PositionIntervalArrayIterator(
+ doc.sortedPositions(), doc.posCount));
+ }
+ //
TextFragment[] fragTexts = highlighter.getBestTextFragments(
- new PosTokenStream(text, new PositionIntervalArrayIterator(doc
- .sortedPositions(), doc.posCount)), text, false, 10);
+ stream , text, false, 10);
String[] frags = new String[fragTexts.length];
for (int i = 0; i < frags.length; i++)
frags[i] = fragTexts[i].toString();
@@ -368,6 +380,36 @@ public class PosHighlighterTest extends
frags[0]);
close();
}
+
+ public void testSloppyPhraseQuery() throws Exception {
+ assertSloppyPhrase( "a b c d a b c d e f", "a b <B>c</B> d <B>a</B> b c d e f", 2, "c", "a");
+ assertSloppyPhrase( "a c e b d e f a b","<B>a</B> c e <B>b</B> d e f <B>a</B> <B>b</B>", 2, "a", "b");
+ assertSloppyPhrase( "X A X B A","<B>X</B> <B>A</B> <B>X</B> B <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A X A X B A X B B A A X B A A","A A <B>X</B> <B>A</B> <B>X</B> B <B>A</B> <B>X</B> B B <B>A</B> <B>A</B> <B>X</B> B <B>A</B> <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A X A X B A X B B A A X B A A", "A A <B>X</B> <B>A</B> <B>X</B> B <B>A</B> <B>X</B> B B <B>A</B> <B>A</B> <B>X</B> B <B>A</B> <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A X A X B A", "A A <B>X</B> <B>A</B> <B>X</B> B <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A Y A X B A", "A A Y <B>A</B> <B>X</B> B <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A Y A X B A A", "A A Y <B>A</B> <B>X</B> B <B>A</B> <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A X A Y B A", "A A <B>X</B> <B>A</B> Y B <B>A</B>", 2, "X", "A", "A");
+ assertSloppyPhrase( "A A X A Y B A", null , 1, "X", "A", "A");
+ close();
+ }
+
+ private void assertSloppyPhrase(String doc, String expected, int slop, String...query) throws Exception {
+ insertDocs(analyzer, doc);
+ PhraseQuery pq = new PhraseQuery();
+ for (String string : query) {
+ pq.add(new Term(F, string));
+ }
+
+ pq.setSlop(slop);
+ String[] frags = doSearch(pq, 50);
+ if (expected == null) {
+ assertNull(frags);
+ } else {
+ assertEquals(expected, frags[0]);
+ }
+ }
public static class BlockPositionIteratorFilter implements PositionIntervalFilter {