You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/09/06 12:52:48 UTC
svn commit: r1520536 - in /lucene/dev/trunk/lucene: ./
highlighter/src/java/org/apache/lucene/search/vectorhighlight/
highlighter/src/test/org/apache/lucene/search/vectorhighlight/
Author: jpountz
Date: Fri Sep 6 10:52:47 2013
New Revision: 1520536
URL: http://svn.apache.org/r1520536
Log:
Revert LUCENE-4734.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Sep 6 10:52:47 2013
@@ -112,9 +112,6 @@ Bug Fixes
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
deleted) segments. (Robert Muir, Shai Erera)
-* LUCENE-4734: Add FastVectorHighlighter support for proximity queries and
- phrase queries with gaps or overlapping terms. (Ryan Lauck, Adrien Grand)
-
* LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE
when there's no indexed data and maybe in other circumstances too. (David Smiley)
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Fri Sep 6 10:52:47 2013
@@ -60,50 +60,50 @@ public class FieldPhraseList {
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
final String field = fieldTermStack.getFieldName();
- QueryPhraseMap qpm = fieldQuery.getRootMap(field);
- if (qpm != null) {
- LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
- extractPhrases(fieldTermStack.termList, qpm, phraseCandidate, 0);
- assert phraseCandidate.size() == 0;
- }
- }
+ LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
+ QueryPhraseMap currMap = null;
+ QueryPhraseMap nextMap = null;
+ while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
+ {
+ phraseCandidate.clear();
+
+ TermInfo ti = fieldTermStack.pop();
+ currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
- void extractPhrases(LinkedList<TermInfo> terms, QueryPhraseMap currMap, LinkedList<TermInfo> phraseCandidate, int longest) {
- if (phraseCandidate.size() > 1 && phraseCandidate.getLast().getPosition() - phraseCandidate.getFirst().getPosition() > currMap.getMaxPhraseWindow()) {
- return;
- }
- if (terms.isEmpty()) {
- if (longest > 0) {
- addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate.subList(0, longest), currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
- }
- return;
- }
- ArrayList<TermInfo> samePositionTerms = new ArrayList<TermInfo>();
- do {
- samePositionTerms.add(terms.pop());
- } while (!terms.isEmpty() && terms.get(0).getPosition() == samePositionTerms.get(0).getPosition());
-
- // try all next terms at the same position
- for (TermInfo nextTerm : samePositionTerms) {
- QueryPhraseMap nextMap = currMap.getTermMap(nextTerm.getText());
- if (nextMap != null) {
- phraseCandidate.add(nextTerm);
- int l = longest;
- if(nextMap.isValidTermOrPhrase( phraseCandidate ) ){
- l = phraseCandidate.size();
+ // if not found, discard top TermInfo from stack, then try next element
+ if( currMap == null ) continue;
+
+ // if found, search the longest phrase
+ phraseCandidate.add( ti );
+ while( true ){
+ ti = fieldTermStack.pop();
+ nextMap = null;
+ if( ti != null )
+ nextMap = currMap.getTermMap( ti.getText() );
+ if( ti == null || nextMap == null ){
+ if( ti != null )
+ fieldTermStack.push( ti );
+ if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
+ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+ }
+ else{
+ while( phraseCandidate.size() > 1 ){
+ fieldTermStack.push( phraseCandidate.removeLast() );
+ currMap = fieldQuery.searchPhrase( field, phraseCandidate );
+ if( currMap != null ){
+ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
+ break;
+ }
+ }
+ }
+ break;
+ }
+ else{
+ phraseCandidate.add( ti );
+ currMap = nextMap;
}
- extractPhrases(terms, nextMap, phraseCandidate, l);
- phraseCandidate.removeLast();
}
}
-
- // ignore the next term
- extractPhrases(terms, currMap, phraseCandidate, longest);
-
- // add terms back
- for (TermInfo nextTerm : samePositionTerms) {
- terms.push(nextTerm);
- }
}
public void addIfNoOverlap( WeightedPhraseInfo wpi ){
@@ -159,11 +159,11 @@ public class FieldPhraseList {
return termsInfos;
}
- public WeightedPhraseInfo( List<TermInfo> terms, float boost ){
+ public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
this( terms, boost, 0 );
}
- public WeightedPhraseInfo( List<TermInfo> terms, float boost, int seqnum ){
+ public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
this.boost = boost;
this.seqnum = seqnum;
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Fri Sep 6 10:52:47 2013
@@ -17,8 +17,6 @@ package org.apache.lucene.search.vectorh
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -30,6 +28,7 @@ import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
@@ -40,7 +39,6 @@ import org.apache.lucene.search.PhraseQu
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
-import org.apache.lucene.util.InPlaceMergeSorter;
/**
* FieldQuery breaks down query object into terms/phrases and keeps
@@ -62,8 +60,6 @@ public class FieldQuery {
// The maximum number of different matching terms accumulated from any one MultiTermQuery
private static final int MAX_MTQ_TERMS = 1024;
-
- private int maxPhraseWindow = 1;
FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
this.fieldMatch = fieldMatch;
@@ -334,8 +330,7 @@ public class FieldQuery {
return root.searchPhrase( phraseCandidate );
}
- /** Get the root map for the given field name. */
- public QueryPhraseMap getRootMap( String fieldName ){
+ private QueryPhraseMap getRootMap( String fieldName ){
return rootMaps.get( fieldMatch ? fieldName : null );
}
@@ -352,7 +347,6 @@ public class FieldQuery {
boolean terminal;
int slop; // valid if terminal == true and phraseHighlight == true
float boost; // valid if terminal == true
- int[] positions; // valid if terminal == true
int termOrPhraseNumber; // valid if terminal == true
FieldQuery fieldQuery;
Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
@@ -375,117 +369,38 @@ public class FieldQuery {
return map;
}
- void add( Query query, IndexReader reader ) {
+ void add( Query query, IndexReader reader ) {
if( query instanceof TermQuery ){
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
}
else if( query instanceof PhraseQuery ){
PhraseQuery pq = (PhraseQuery)query;
- final Term[] terms = pq.getTerms();
- final int[] positions = pq.getPositions();
- new InPlaceMergeSorter() {
-
- @Override
- protected void swap(int i, int j) {
- Term tmpTerm = terms[i];
- terms[i] = terms[j];
- terms[j] = tmpTerm;
-
- int tmpPos = positions[i];
- positions[i] = positions[j];
- positions[j] = tmpPos;
- }
-
- @Override
- protected int compare(int i, int j) {
- return positions[i] - positions[j];
- }
- }.sort(0, terms.length);
-
- addToMap(pq, terms, positions, 0, subMap, pq.getSlop());
+ Term[] terms = pq.getTerms();
+ Map<String, QueryPhraseMap> map = subMap;
+ QueryPhraseMap qpm = null;
+ for( Term term : terms ){
+ qpm = getOrNewMap( map, term.text() );
+ map = qpm.subMap;
+ }
+ qpm.markTerminal( pq.getSlop(), pq.getBoost() );
}
else
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
}
-
- private int numTermsAtSamePosition(int[] positions, int i) {
- int numTermsAtSamePosition = 1;
- for (int j = i + 1; j < positions.length; ++j) {
- if (positions[j] == positions[i]) {
- ++numTermsAtSamePosition;
- }
- }
- return numTermsAtSamePosition;
- }
-
- private void addToMap(PhraseQuery pq, Term[] terms, int[] positions, int i, Map<String, QueryPhraseMap> map, int slop) {
- int numTermsAtSamePosition = numTermsAtSamePosition(positions, i);
- for (int j = 0; j < numTermsAtSamePosition; ++j) {
- QueryPhraseMap qpm = getOrNewMap(map, terms[i + j].text());
- if (i + numTermsAtSamePosition == terms.length) {
- qpm.markTerminal(pq.getSlop(), pq.getBoost(), uniquePositions(positions));
- } else {
- addToMap(pq, terms, positions, i + numTermsAtSamePosition, qpm.subMap, slop);
- }
- }
- if (slop > 2 && i + numTermsAtSamePosition < terms.length) {
- Term[] otherTerms = Arrays.copyOf(terms, terms.length);
- int[] otherPositions = Arrays.copyOf(positions, positions.length);
- final int nextTermAtSamePosition = numTermsAtSamePosition(positions, i + numTermsAtSamePosition);
- System.arraycopy(terms, i + numTermsAtSamePosition, otherTerms, i, nextTermAtSamePosition);
- System.arraycopy(positions, i + numTermsAtSamePosition, otherPositions, i, nextTermAtSamePosition);
- System.arraycopy(terms, i, otherTerms, i + nextTermAtSamePosition, numTermsAtSamePosition);
- System.arraycopy(positions, i, otherPositions, i + nextTermAtSamePosition, numTermsAtSamePosition);
- addToMap(pq, otherTerms, otherPositions, i, map, slop - 2);
- }
- }
-
- private int[] uniquePositions(int[] positions) {
- int uniqueCount = 1;
- for (int i = 1; i < positions.length; ++i) {
- if (positions[i] != positions[i - 1]) {
- ++uniqueCount;
- }
- }
- if (uniqueCount == positions.length) {
- return positions;
- }
- int[] result = new int[uniqueCount];
- result[0] = positions[0];
- for (int i = 1, j = 1; i < positions.length; ++i) {
- if (positions[i] != positions[i - 1]) {
- result[j++] = positions[i];
- }
- }
- return result;
- }
-
+
public QueryPhraseMap getTermMap( String term ){
return subMap.get( term );
}
private void markTerminal( float boost ){
- markTerminal( 0, boost, null );
+ markTerminal( 0, boost );
}
- private void markTerminal( int slop, float boost, int[] positions ){
- if (slop > this.slop || (slop == this.slop && boost > this.boost)) {
- this.terminal = true;
- this.slop = slop;
- this.boost = boost;
- this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
- this.positions = positions;
- if (positions != null) {
- fieldQuery.maxPhraseWindow = Math.max(fieldQuery.maxPhraseWindow, slop + positions[positions.length-1] - positions[0]);
- }
- }
- }
-
- /**
- * The max phrase window based on the actual phrase positions and slop.
- */
- int getMaxPhraseWindow() {
- return fieldQuery.maxPhraseWindow;
+ private void markTerminal( int slop, float boost ){
+ this.terminal = true;
+ this.slop = slop;
+ this.boost = boost;
+ this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
}
public boolean isTerminal(){
@@ -520,20 +435,15 @@ public class FieldQuery {
// if the candidate is a term, it is valid
if( phraseCandidate.size() == 1 ) return true;
-
- assert phraseCandidate.size() == positions.length;
// else check whether the candidate is valid phrase
// compare position-gaps between terms to slop
int pos = phraseCandidate.get( 0 ).getPosition();
- int totalDistance = 0;
for( int i = 1; i < phraseCandidate.size(); i++ ){
int nextPos = phraseCandidate.get( i ).getPosition();
- final int expectedDelta = positions[i] - positions[i - 1];
- final int actualDelta = nextPos - pos;
- totalDistance += Math.abs(expectedDelta - actualDelta);
+ if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
pos = nextPos;
}
- return totalDistance <= slop;
+ return true;
}
}
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Fri Sep 6 10:52:47 2013
@@ -145,13 +145,6 @@ public class FieldTermStack {
}
/**
- * Return the top TermInfo object of the stack without removing it.
- */
- public TermInfo peek() {
- return termList.peek();
- }
-
- /**
* @param termInfo the TermInfo object to be put on the top of the stack
*/
public void push( TermInfo termInfo ){
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Fri Sep 6 10:52:47 2013
@@ -16,18 +16,10 @@ package org.apache.lucene.search.vectorh
* limitations under the License.
*/
import java.io.IOException;
-import java.io.Reader;
-import java.util.Arrays;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -35,24 +27,20 @@ import org.apache.lucene.document.TextFi
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
public class FastVectorHighlighterTest extends LuceneTestCase {
-
- private static final String FIELD = "text";
+
public void testSimpleHighlightTest() throws IOException {
Directory dir = newDirectory();
@@ -299,171 +287,4 @@ public class FastVectorHighlighterTest e
writer.close();
dir.close();
}
-
- public void testLotsOfPhrases() throws IOException {
- Directory dir = newDirectory();
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
- FieldType type = new FieldType(TextField.TYPE_STORED);
- type.setStoreTermVectorOffsets(true);
- type.setStoreTermVectorPositions(true);
- type.setStoreTermVectors(true);
- type.freeze();
- String[] terms = { "org", "apache", "lucene"};
- int iters = 1000; // don't let it go too big, or jenkins will stack overflow: atLeast(1000);
- StringBuilder builder = new StringBuilder();
- for (int i = 0; i < iters; i++) {
- builder.append(terms[random().nextInt(terms.length)]).append(" ");
- if (random().nextInt(6) == 3) {
- builder.append("solr").append(" ");
- }
- }
- Document doc = new Document();
- Field field = new Field("field", builder.toString(), type);
- doc.add(field);
- writer.addDocument(doc);
- PhraseQuery query = new PhraseQuery();
- query.add(new Term("field", "org"));
- query.add(new Term("field", "apache"));
- query.add(new Term("field", "lucene"));
-
-
- FastVectorHighlighter highlighter = new FastVectorHighlighter();
- IndexReader reader = DirectoryReader.open(writer, true);
- IndexSearcher searcher = newSearcher(reader);
- TopDocs hits = searcher.search(query, 10);
- assertEquals(1, hits.totalHits);
- FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
- String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
- for (int i = 0; i < bestFragments.length; i++) {
- String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
- assertFalse(result.contains("org apache lucene"));
- }
- reader.close();
- writer.close();
- dir.close();
- }
-
- public void testOverlappingPhrases() throws IOException {
- final Analyzer analyzer = new Analyzer() {
-
- @Override
- protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- final Tokenizer source = new MockTokenizer(reader);
- TokenStream sink = source;
- sink = new SynonymFilter(sink);
- return new TokenStreamComponents(source, sink);
- }
-
- };
- final Directory directory = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
- Document doc = new Document();
- FieldType withVectors = new FieldType(TextField.TYPE_STORED);
- withVectors.setStoreTermVectors(true);
- withVectors.setStoreTermVectorPositions(true);
- withVectors.setStoreTermVectorOffsets(true);
- doc.add(new Field(FIELD, "a b c", withVectors));
- iw.addDocument(doc);
- DirectoryReader ir = iw.getReader();
-
- // Disjunction of two overlapping phrase queries
- final PhraseQuery pq1 = new PhraseQuery();
- pq1.add(new Term(FIELD, "a"), 0);
- pq1.add(new Term(FIELD, "b"), 1);
- pq1.add(new Term(FIELD, "c"), 2);
-
- final PhraseQuery pq2 = new PhraseQuery();
- pq2.add(new Term(FIELD, "a"), 0);
- pq2.add(new Term(FIELD, "B"), 1);
- pq2.add(new Term(FIELD, "c"), 2);
-
- final BooleanQuery bq = new BooleanQuery();
- bq.add(pq1, Occur.SHOULD);
- bq.add(pq2, Occur.SHOULD);
-
- // Single phrase query with two terms at the same position
- final PhraseQuery pq = new PhraseQuery();
- pq.add(new Term(FIELD, "a"), 0);
- pq.add(new Term(FIELD, "b"), 1);
- pq.add(new Term(FIELD, "B"), 1);
- pq.add(new Term(FIELD, "c"), 2);
-
- for (Query query : Arrays.asList(pq1, pq2, bq, pq)) {
- assertEquals(1, new IndexSearcher(ir).search(bq, 1).totalHits);
-
- FastVectorHighlighter highlighter = new FastVectorHighlighter();
- FieldQuery fieldQuery = highlighter.getFieldQuery(query, ir);
- String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
- assertEquals("<b>a b c</b>", bestFragments[0]);
- }
-
- ir.close();
- iw.close();
- directory.close();
- }
-
- public void testPhraseWithGap() throws IOException {
- final Directory directory = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
- Document doc = new Document();
- FieldType withVectors = new FieldType(TextField.TYPE_STORED);
- withVectors.setStoreTermVectors(true);
- withVectors.setStoreTermVectorPositions(true);
- withVectors.setStoreTermVectorOffsets(true);
- doc.add(new Field(FIELD, "a b c", withVectors));
- iw.addDocument(doc);
- DirectoryReader ir = iw.getReader();
-
- final PhraseQuery pq = new PhraseQuery();
- pq.add(new Term(FIELD, "c"), 2);
- pq.add(new Term(FIELD, "a"), 0);
-
- assertEquals(1, new IndexSearcher(ir).search(pq, 1).totalHits);
-
- FastVectorHighlighter highlighter = new FastVectorHighlighter();
- FieldQuery fieldQuery = highlighter.getFieldQuery(pq, ir);
- String[] bestFragments = highlighter.getBestFragments(fieldQuery, ir, 0, FIELD, 1000, 1);
- assertEquals("<b>a</b> b <b>c</b>", bestFragments[0]);
-
- ir.close();
- iw.close();
- directory.close();
- }
-
- // Simple token filter that adds 'B' as a synonym of 'b'
- private static class SynonymFilter extends TokenFilter {
-
- final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
- State pending;
-
- protected SynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (pending != null) {
- restoreState(pending);
- termAtt.setEmpty().append('B');
- posIncAtt.setPositionIncrement(0);
- pending = null;
- return true;
- }
- if (!input.incrementToken()) {
- return false;
- }
- if (termAtt.toString().equals("b")) {
- pending = captureState();
- }
- return true;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- pending = null;
- }
- }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java Fri Sep 6 10:52:47 2013
@@ -120,31 +120,7 @@ public class FieldPhraseListTest extends
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
}
-
- public void testProximityPhraseReverse() throws Exception {
- make1d1fIndex( "z a a b c" );
-
- FieldQuery fq = new FieldQuery( pqF( 2F, 3, "c", "a" ), true, true );
- FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
- FieldPhraseList fpl = new FieldPhraseList( stack, fq );
- assertEquals( 1, fpl.phraseList.size() );
- assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() );
- assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
- assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
- }
-
- public void testProximityPhraseWithRepeatedTerms() throws Exception {
- make1d1fIndex( "z a a b b z d" );
-
- FieldQuery fq = new FieldQuery( pqF( 2F, 2, "a", "b", "d" ), true, true );
- FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
- FieldPhraseList fpl = new FieldPhraseList( stack, fq );
- assertEquals( 1, fpl.phraseList.size() );
- assertEquals( "abd(2.0)((4,7)(12,13))", fpl.phraseList.get( 0 ).toString() );
- assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
- assertEquals( 13, fpl.phraseList.get( 0 ).getEndOffset() );
- }
-
+
public void test2PhrasesOverlap() throws Exception {
make1d1fIndex( "d a b c d" );
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1520536&r1=1520535&r2=1520536&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Fri Sep 6 10:52:47 2013
@@ -863,8 +863,8 @@ public class FieldQueryTest extends Abst
phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
assertNull( fq.searchPhrase( F, phraseCandidate ) );
- // "a b c"~2
- query = pqF( 1F, 2, "a", "b", "c" );
+ // "a b c"~1
+ query = pqF( 1F, 1, "a", "b", "c" );
// phraseHighlight = true, fieldMatch = true
fq = new FieldQuery( query, true, true );