You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/04/05 18:49:48 UTC

svn commit: r1465041 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/icu/src/java/org/apache/lucene/collation/ lucene/backwards/ lucene/benchmark/ lucene/classification/ lucene/classification/src/ lucene/code...

Author: simonw
Date: Fri Apr  5 16:49:46 2013
New Revision: 1465041

URL: http://svn.apache.org/r1465041
Log:
LUCENE-4899: FastVectorHighlihgter failed with StringIndexOutOfBoundsException if a single highlight phrase or term was greater than the fragCharSize producing negative string offsets

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/classification/   (props changed)
    lucene/dev/branches/branch_4x/lucene/classification/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/classification/ivy.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/classification/src/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSort.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTotalHitCountCollector.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/core/TestConfig.java   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/site/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1465041&r1=1465040&r2=1465041&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri Apr  5 16:49:46 2013
@@ -188,6 +188,10 @@ Bug Fixes
   with target<=current (in this case the behavior of advance is undefined).
   (Adrien Grand)
 
+* LUCENE-4899: FastVectorHighlihgter failed with StringIndexOutOfBoundsException
+  if a single highlight phrase or term was greater than the fragCharSize producing
+  negative string offsets. (Simon Willnauer)
+
 Documentation
 
 * LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java?rev=1465041&r1=1465040&r2=1465041&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java Fri Apr  5 16:49:46 2013
@@ -46,63 +46,98 @@ public abstract class BaseFragListBuilde
     this( MARGIN_DEFAULT );
   }
   
-  protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
-    
+ protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
     if( fragCharSize < minFragCharSize )
       throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );
     
     List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
-    Iterator<WeightedPhraseInfo> ite = fieldPhraseList.getPhraseList().iterator();
+    IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
     WeightedPhraseInfo phraseInfo = null;
     int startOffset = 0;
-    boolean taken = false;
-    while( true ){
-      if( !taken ){
-        if( !ite.hasNext() ) break;
-        phraseInfo = ite.next();
-      }
-      taken = false;
-      if( phraseInfo == null ) break;
-
+    while((phraseInfo = queue.top()) != null){
       // if the phrase violates the border of previous fragment, discard it and try next phrase
-      if( phraseInfo.getStartOffset() < startOffset ) continue;
-
+      if( phraseInfo.getStartOffset() < startOffset )  {
+        queue.removeTop();
+        continue;
+      }
+      
       wpil.clear();
-      wpil.add( phraseInfo );
-      int firstOffset = phraseInfo.getStartOffset();
-      int st = phraseInfo.getStartOffset() - margin < startOffset ?
-          startOffset : phraseInfo.getStartOffset() - margin;
-      int en = st + fragCharSize;
-      if( phraseInfo.getEndOffset() > en )
-        en = phraseInfo.getEndOffset();
-
-      int lastEndOffset = phraseInfo.getEndOffset();
-      while( true ){
-        if( ite.hasNext() ){
-          phraseInfo = ite.next();
-          taken = true;
-          if( phraseInfo == null ) break;
-        }
-        else
+      final int currentPhraseStartOffset = phraseInfo.getStartOffset();
+      int currentPhraseEndOffset = phraseInfo.getEndOffset();
+      int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
+      int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
+      if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+        wpil.add(phraseInfo);
+      }
+      while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
+        if (phraseInfo.getEndOffset() <= spanEnd) {
+          currentPhraseEndOffset = phraseInfo.getEndOffset();
+          if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+            wpil.add(phraseInfo);
+          }
+        } else {
           break;
-        if( phraseInfo.getEndOffset() <= en ){
-          wpil.add( phraseInfo );
-          lastEndOffset = phraseInfo.getEndOffset();
         }
-        else
-          break;
       }
-      int matchLen = lastEndOffset - firstOffset;
-      //now recalculate the start and end position to "center" the result
-      int newMargin = (fragCharSize-matchLen)/2;
-      st = firstOffset - newMargin;
-      if(st<startOffset){
-        st = startOffset;
+      if (wpil.isEmpty()) {
+        continue;
+      }
+      
+      final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
+      // now recalculate the start and end position to "center" the result
+      final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
+      spanStart = currentPhraseStartOffset - newMargin;
+      if (spanStart < startOffset) {
+        spanStart = startOffset;
       }
-      en = st+fragCharSize;
-      startOffset = en;
-      fieldFragList.add( st, en, wpil );
+      // whatever is bigger here we grow this out
+      spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
+      startOffset = spanEnd;
+      fieldFragList.add(spanStart, spanEnd, wpil);
     }
     return fieldFragList;
   }
+ 
+  /**
+   * A predicate to decide if the given {@link WeightedPhraseInfo} should be
+   * accepted as a highlighted phrase or if it should be discarded.
+   * <p>
+   * The default implementation discards phrases that are composed of more than one term
+   * and where the matchLength exceeds the fragment character size.
+   * 
+   * @param info the phrase info to accept
+   * @param matchLength the match length of the current phrase
+   * @param fragCharSize the configured fragment character size
+   * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
+   */
+ protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
+   return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
+ }
+ 
+ private static final class IteratorQueue<T> {
+   private final Iterator<T> iter;
+   private T top;
+   
+   public IteratorQueue(Iterator<T> iter) {
+     this.iter = iter;
+     T removeTop = removeTop();
+     assert removeTop == null;
+   }
+   
+   public T top() {
+     return top;
+   }
+   
+   public T removeTop() {
+     T currentTop = top;
+     if (iter.hasNext()) {
+       top = iter.next();
+     } else {
+       top = null;
+     }
+     return currentTop;
+   }
+   
+ }
+ 
 }

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1465041&r1=1465040&r2=1465041&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Fri Apr  5 16:49:46 2013
@@ -81,7 +81,7 @@ public class FieldPhraseList {
         if( ti != null )
           nextMap = currMap.getTermMap( ti.getText() );
         if( ti == null || nextMap == null ){
-          if( ti != null )
+          if( ti != null ) 
             fieldTermStack.push( ti );
           if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
             addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1465041&r1=1465040&r2=1465041&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Fri Apr  5 16:49:46 2013
@@ -29,18 +29,19 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
-import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
 
 public class FastVectorHighlighterTest extends LuceneTestCase {
   
+  
   public void testSimpleHighlightTest() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
@@ -71,6 +72,179 @@ public class FastVectorHighlighterTest e
     dir.close();
   }
   
+  public void testPhraseHighlightLongTextTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field text = new Field("text", 
+        "Netscape was the general name for a series of web browsers originally produced by Netscape Communications Corporation, now a subsidiary of AOL The original browser was once the dominant browser in terms of usage share, but as a result of the first browser war it lost virtually all of its share to Internet Explorer Netscape was discontinued and support for all Netscape browsers and client products was terminated on March 1, 2008 Netscape Navigator was the name of Netscape\u0027s web browser from versions 1.0 through 4.8 The first beta release versions of the browser were released in 1994 and known as Mosaic and then Mosaic Netscape until a legal challenge from the National Center for Supercomputing Applications (makers of NCSA Mosaic, which many of Netscape\u0027s founders used to develop), led to the name change to Netscape Navigator The company\u0027s name also changed from Mosaic Communications Corporation to Netscape Communications Corporation The browser was eas
 ily the most advanced...", type);
+    doc.add(text);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "text";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "internet")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "explorer")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet</b> <b>Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "internet"));
+      query.add(new Term(field, "explorer"));
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
+  // see LUCENE-4899
+  public void testPhraseHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
+    Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
+
+    doc.add(longTermField);
+    doc.add(noLongTermField);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "no_long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighed"));
+      query.setSlop(3);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighted"));
+      query.setSlop(30);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      BooleanQuery inner = new BooleanQuery();
+      inner.add(pq, Occur.MUST);
+      inner.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(inner, Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+      
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    
+    field = "long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field,
+          "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>",
+          bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
   public void testCommonTermsQueryHighlightTest() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1465041&r1=1465040&r2=1465041&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java Fri Apr  5 16:49:46 2013
@@ -42,7 +42,7 @@ public class SimpleFragListBuilderTest e
     SimpleFragListBuilder sflb = new SimpleFragListBuilder();
     FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
-    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@@ -55,7 +55,7 @@ public class SimpleFragListBuilderTest e
     FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh   jklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
     if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
-    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void test1TermIndex() throws Exception {