You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/07 13:21:14 UTC

svn commit: r1479862 [21/38] - in /lucene/dev/branches/lucene4258: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/maven/ dev-tools/maven/solr/ dev-tools/maven/solr/core/src/java/ dev-tools/maven/solr/solrj/src/java/ dev-t...

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Tue May  7 11:20:55 2013
@@ -33,8 +33,8 @@ import java.util.TreeSet;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.MultiReader;
@@ -48,6 +48,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.UnicodeUtil;
 
 /**
@@ -96,9 +97,14 @@ public class PostingsHighlighter {
   public static final int DEFAULT_MAX_LENGTH = 10000;
     
   private final int maxLength;
-  private final BreakIterator breakIterator;
-  private final PassageScorer scorer;
-  private final PassageFormatter formatter;
+
+  /** Set the first time {@link #getFormatter} is called,
+   *  and then reused. */
+  private PassageFormatter defaultFormatter;
+
+  /** Set the first time {@link #getScorer} is called,
+   *  and then reused. */
+  private PassageScorer defaultScorer;
   
   /**
    * Creates a new highlighter with default parameters.
@@ -113,35 +119,42 @@ public class PostingsHighlighter {
    * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
    */
   public PostingsHighlighter(int maxLength) {
-    this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT), new PassageScorer(), new PassageFormatter());
-  }
-  
-  /**
-   * Creates a new highlighter with custom parameters.
-   * @param maxLength maximum content size to process.
-   * @param breakIterator used for finding passage
-   *        boundaries; pass null to highlight the entire
-   *        content as a single Passage.
-   * @param scorer used for ranking passages.
-   * @param formatter used for formatting passages into highlighted snippets.
-   * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
-   */
-  public PostingsHighlighter(int maxLength, BreakIterator breakIterator, PassageScorer scorer, PassageFormatter formatter) {
     if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
       // two reasons: no overflow problems in BreakIterator.preceding(offset+1),
       // our sentinel in the offsets queue uses this value to terminate.
       throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
     }
-    if (breakIterator == null) {
-      breakIterator = new WholeBreakIterator();
-    }
-    if (scorer == null || formatter == null) {
-      throw new NullPointerException();
-    }
     this.maxLength = maxLength;
-    this.breakIterator = breakIterator;
-    this.scorer = scorer;
-    this.formatter = formatter;
+  }
+  
+  /** Returns the {@link BreakIterator} to use for
+   *  dividing text into passages.  This returns 
+   *  {@link BreakIterator#getSentenceInstance(Locale)} by default;
+   *  subclasses can override to customize. */
+  protected BreakIterator getBreakIterator(String field) {
+    return BreakIterator.getSentenceInstance(Locale.ROOT);
+  }
+
+  /** Returns the {@link PassageFormatter} to use for
+   *  formatting passages into highlighted snippets.  This
+   *  returns a new {@code PassageFormatter} by default;
+   *  subclasses can override to customize. */
+  protected PassageFormatter getFormatter(String field) {
+    if (defaultFormatter == null) {
+      defaultFormatter = new DefaultPassageFormatter();
+    }
+    return defaultFormatter;
+  }
+
+  /** Returns the {@link PassageScorer} to use for
+   *  ranking passages.  This
+   *  returns a new {@code PassageScorer} by default;
+   *  subclasses can override to customize. */
+  protected PassageScorer getScorer(String field) {
+    if (defaultScorer == null) {
+      defaultScorer = new PassageScorer();
+    }
+    return defaultScorer;
   }
 
   /**
@@ -182,7 +195,7 @@ public class PostingsHighlighter {
    *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
    */
   public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
-    Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, maxPassages);
+    Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, new int[] { maxPassages });
     return res.get(field);
   }
   
@@ -212,7 +225,9 @@ public class PostingsHighlighter {
    *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
    */
   public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
-    return highlightFields(fields, query, searcher, topDocs, 1);
+    int maxPassages[] = new int[fields.length];
+    Arrays.fill(maxPassages, 1);
+    return highlightFields(fields, query, searcher, topDocs, maxPassages);
   }
   
   /**
@@ -243,7 +258,7 @@ public class PostingsHighlighter {
    * @throws IllegalArgumentException if <code>field</code> was indexed without 
    *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
    */
-  public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
+  public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages[]) throws IOException {
     final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
     int docids[] = new int[scoreDocs.length];
     for (int i = 0; i < docids.length; i++) {
@@ -257,12 +272,12 @@ public class PostingsHighlighter {
    * Highlights the top-N passages from multiple fields,
    * for the provided int[] docids.
    * 
-   * @param fields field names to highlight. 
+   * @param fieldsIn field names to highlight. 
    *        Must have a stored string value and also be indexed with offsets.
    * @param query query to highlight.
    * @param searcher searcher that was previously used to execute the query.
-   * @param docids containing the document IDs to highlight.
-   * @param maxPassages The maximum number of top-N ranked passages per-field used to 
+   * @param docidsIn containing the document IDs to highlight.
+   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to 
    *        form the highlighted snippets.
    * @return Map keyed on field name, containing the array of formatted snippets 
    *         corresponding to the documents in <code>topDocs</code>. 
@@ -273,7 +288,13 @@ public class PostingsHighlighter {
    * @throws IllegalArgumentException if <code>field</code> was indexed without 
    *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
    */
-  public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, int[] docids, int maxPassages) throws IOException {
+  public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
+    if (fieldsIn.length < 1) {
+      throw new IllegalArgumentException("fieldsIn must not be empty");
+    }
+    if (fieldsIn.length != maxPassagesIn.length) {
+      throw new IllegalArgumentException("invalid number of maxPassagesIn");
+    }
     final IndexReader reader = searcher.getIndexReader();
     query = rewrite(query);
     SortedSet<Term> queryTerms = new TreeSet<Term>();
@@ -282,11 +303,34 @@ public class PostingsHighlighter {
     IndexReaderContext readerContext = reader.getContext();
     List<AtomicReaderContext> leaves = readerContext.leaves();
 
-    BreakIterator bi = (BreakIterator)breakIterator.clone();
+    // Make our own copies because we sort in-place:
+    int[] docids = new int[docidsIn.length];
+    System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
+    final String fields[] = new String[fieldsIn.length];
+    System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
+    final int maxPassages[] = new int[maxPassagesIn.length];
+    System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
 
     // sort for sequential io
     Arrays.sort(docids);
-    Arrays.sort(fields);
+    new InPlaceMergeSorter() {
+
+      @Override
+      protected void swap(int i, int j) {
+        String tmp = fields[i];
+        fields[i] = fields[j];
+        fields[j] = tmp;
+        int tmp2 = maxPassages[i];
+        maxPassages[i] = maxPassages[j];
+        maxPassages[j] = tmp2;
+      }
+
+      @Override
+      protected int compare(int i, int j) {
+        return fields[i].compareTo(fields[j]);
+      }
+      
+    }.sort(0, fields.length);
     
     // pull stored data:
     String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
@@ -294,16 +338,23 @@ public class PostingsHighlighter {
     Map<String,String[]> highlights = new HashMap<String,String[]>();
     for (int i = 0; i < fields.length; i++) {
       String field = fields[i];
+      int numPassages = maxPassages[i];
       Term floor = new Term(field, "");
       Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
       SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
       // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
-      Term terms[] = fieldTerms.toArray(new Term[fieldTerms.size()]);
-      Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
+
+      // Strip off the redundant field:
+      BytesRef terms[] = new BytesRef[fieldTerms.size()];
+      int termUpto = 0;
+      for(Term term : fieldTerms) {
+        terms[termUpto++] = term.bytes();
+      }
+      Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
         
       String[] result = new String[docids.length];
-      for (int j = 0; j < docids.length; j++) {
-        result[j] = fieldHighlights.get(docids[j]);
+      for (int j = 0; j < docidsIn.length; j++) {
+        result[j] = fieldHighlights.get(docidsIn[j]);
       }
       highlights.put(field, result);
     }
@@ -329,7 +380,7 @@ public class PostingsHighlighter {
     return contents;
   }
     
-  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
+  private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {  
     Map<Integer,String> highlights = new HashMap<Integer,String>();
     
     // reuse in the real sense... for docs in same segment we just advance our old enum
@@ -337,6 +388,11 @@ public class PostingsHighlighter {
     TermsEnum termsEnum = null;
     int lastLeaf = -1;
 
+    PassageFormatter fieldFormatter = getFormatter(field);
+    if (fieldFormatter == null) {
+      throw new NullPointerException("PassageFormatter cannot be null");
+    }
+
     for (int i = 0; i < docids.length; i++) {
       String content = contents[i];
       if (content.length() == 0) {
@@ -362,7 +418,7 @@ public class PostingsHighlighter {
       if (passages.length > 0) {
         // otherwise a null snippet (eg if field is missing
         // entirely from the doc)
-        highlights.put(doc, formatter.format(passages, content));
+        highlights.put(doc, fieldFormatter.format(passages, content));
       }
       lastLeaf = leaf;
     }
@@ -373,8 +429,12 @@ public class PostingsHighlighter {
   // algorithm: treat sentence snippets as miniature documents
   // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
   // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
-  private Passage[] highlightDoc(String field, Term terms[], int contentLength, BreakIterator bi, int doc, 
+  private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc, 
       TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException {
+    PassageScorer scorer = getScorer(field);
+    if (scorer == null) {
+      throw new NullPointerException("PassageScorer cannot be null");
+    }
     PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
     float weights[] = new float[terms.length];
     // initialize postings
@@ -385,7 +445,7 @@ public class PostingsHighlighter {
         continue;
       } else if (de == null) {
         postings[i] = EMPTY; // initially
-        if (!termsEnum.seekExact(terms[i].bytes(), true)) {
+        if (!termsEnum.seekExact(terms[i], true)) {
           continue; // term not found
         }
         de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java Tue May  7 11:20:55 2013
@@ -20,11 +20,11 @@ package org.apache.lucene.search.posting
 import java.text.BreakIterator;
 import java.text.CharacterIterator;
 
-/** Just produces one single fragment for the entire
- *  string. */
-final class WholeBreakIterator extends BreakIterator {
+/** Just produces one single fragment for the entire text */
+public final class WholeBreakIterator extends BreakIterator {
   private CharacterIterator text;
-  private int len;
+  private int start;
+  private int end;
   private int current;
 
   @Override
@@ -34,14 +34,17 @@ final class WholeBreakIterator extends B
 
   @Override
   public int first() {
-    return (current = 0);
+    return (current = start);
   }
 
   @Override
   public int following(int pos) {
-    if (pos < 0 || pos > len) {
+    if (pos < start || pos > end) {
       throw new IllegalArgumentException("offset out of bounds");
-    } else if (pos == len) {
+    } else if (pos == end) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
+      current = end;
       return DONE;
     } else {
       return last();
@@ -55,12 +58,12 @@ final class WholeBreakIterator extends B
 
   @Override
   public int last() {
-    return (current = len);
+    return (current = end);
   }
 
   @Override
   public int next() {
-    if (current == len) {
+    if (current == end) {
       return DONE;
     } else {
       return last();
@@ -83,9 +86,12 @@ final class WholeBreakIterator extends B
 
   @Override
   public int preceding(int pos) {
-    if (pos < 0 || pos > len) {
+    if (pos < start || pos > end) {
       throw new IllegalArgumentException("offset out of bounds");
-    } else if (pos == 0) {
+    } else if (pos == start) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
+      current = start;
       return DONE;
     } else {
       return first();
@@ -94,7 +100,7 @@ final class WholeBreakIterator extends B
 
   @Override
   public int previous() {
-    if (current == 0) {
+    if (current == start) {
       return DONE;
     } else {
       return first();
@@ -103,11 +109,9 @@ final class WholeBreakIterator extends B
 
   @Override
   public void setText(CharacterIterator newText) {
-    if (newText.getBeginIndex() != 0) {
-      throw new UnsupportedOperationException();
-    }
-    len = newText.getEndIndex();
+    start = newText.getBeginIndex();
+    end = newText.getEndIndex();
     text = newText;
-    current = 0;
+    current = start;
   }
 }

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java Tue May  7 11:20:55 2013
@@ -46,63 +46,98 @@ public abstract class BaseFragListBuilde
     this( MARGIN_DEFAULT );
   }
   
-  protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
-    
+ protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
     if( fragCharSize < minFragCharSize )
       throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );
     
     List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
-    Iterator<WeightedPhraseInfo> ite = fieldPhraseList.getPhraseList().iterator();
+    IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
     WeightedPhraseInfo phraseInfo = null;
     int startOffset = 0;
-    boolean taken = false;
-    while( true ){
-      if( !taken ){
-        if( !ite.hasNext() ) break;
-        phraseInfo = ite.next();
-      }
-      taken = false;
-      if( phraseInfo == null ) break;
-
+    while((phraseInfo = queue.top()) != null){
       // if the phrase violates the border of previous fragment, discard it and try next phrase
-      if( phraseInfo.getStartOffset() < startOffset ) continue;
-
+      if( phraseInfo.getStartOffset() < startOffset )  {
+        queue.removeTop();
+        continue;
+      }
+      
       wpil.clear();
-      wpil.add( phraseInfo );
-      int firstOffset = phraseInfo.getStartOffset();
-      int st = phraseInfo.getStartOffset() - margin < startOffset ?
-          startOffset : phraseInfo.getStartOffset() - margin;
-      int en = st + fragCharSize;
-      if( phraseInfo.getEndOffset() > en )
-        en = phraseInfo.getEndOffset();
-
-      int lastEndOffset = phraseInfo.getEndOffset();
-      while( true ){
-        if( ite.hasNext() ){
-          phraseInfo = ite.next();
-          taken = true;
-          if( phraseInfo == null ) break;
-        }
-        else
+      final int currentPhraseStartOffset = phraseInfo.getStartOffset();
+      int currentPhraseEndOffset = phraseInfo.getEndOffset();
+      int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
+      int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
+      if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+        wpil.add(phraseInfo);
+      }
+      while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
+        if (phraseInfo.getEndOffset() <= spanEnd) {
+          currentPhraseEndOffset = phraseInfo.getEndOffset();
+          if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
+            wpil.add(phraseInfo);
+          }
+        } else {
           break;
-        if( phraseInfo.getEndOffset() <= en ){
-          wpil.add( phraseInfo );
-          lastEndOffset = phraseInfo.getEndOffset();
         }
-        else
-          break;
       }
-      int matchLen = lastEndOffset - firstOffset;
-      //now recalculate the start and end position to "center" the result
-      int newMargin = (fragCharSize-matchLen)/2;
-      st = firstOffset - newMargin;
-      if(st<startOffset){
-        st = startOffset;
+      if (wpil.isEmpty()) {
+        continue;
+      }
+      
+      final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
+      // now recalculate the start and end position to "center" the result
+      final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
+      spanStart = currentPhraseStartOffset - newMargin;
+      if (spanStart < startOffset) {
+        spanStart = startOffset;
       }
-      en = st+fragCharSize;
-      startOffset = en;
-      fieldFragList.add( st, en, wpil );
+      // whatever is bigger here we grow this out
+      spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
+      startOffset = spanEnd;
+      fieldFragList.add(spanStart, spanEnd, wpil);
     }
     return fieldFragList;
   }
+ 
+  /**
+   * A predicate to decide if the given {@link WeightedPhraseInfo} should be
+   * accepted as a highlighted phrase or if it should be discarded.
+   * <p>
+   * The default implementation discards phrases that are composed of more than one term
+   * and where the matchLength exceeds the fragment character size.
+   * 
+   * @param info the phrase info to accept
+   * @param matchLength the match length of the current phrase
+   * @param fragCharSize the configured fragment character size
+   * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
+   */
+ protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
+   return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
+ }
+ 
+ private static final class IteratorQueue<T> {
+   private final Iterator<T> iter;
+   private T top;
+   
+   public IteratorQueue(Iterator<T> iter) {
+     this.iter = iter;
+     T removeTop = removeTop();
+     assert removeTop == null;
+   }
+   
+   public T top() {
+     return top;
+   }
+   
+   public T removeTop() {
+     T currentTop = top;
+     if (iter.hasNext()) {
+       top = iter.next();
+     } else {
+       top = null;
+     }
+     return currentTop;
+   }
+   
+ }
+ 
 }

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Tue May  7 11:20:55 2013
@@ -81,7 +81,7 @@ public class FieldPhraseList {
         if( ti != null )
           nextMap = currMap.getTermMap( ti.getText() );
         if( ti == null || nextMap == null ){
-          if( ti != null )
+          if( ti != null ) 
             fieldTermStack.push( ti );
           if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
             addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue May  7 11:20:55 2013
@@ -91,7 +91,7 @@ public class HighlighterTest extends Bas
     phraseQuery.add(new Term(FIELD_NAME, "long"));
 
     query = phraseQuery;
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     TopDocs hits = searcher.search(query, 10);
     
     QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
@@ -122,7 +122,7 @@ public class HighlighterTest extends Bas
     query.add(new Term(FIELD_NAME, "long"));
     query.add(new Term(FIELD_NAME, "very"));
 
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     TopDocs hits = searcher.search(query, 10);
     assertEquals(2, hits.totalHits);
     QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
@@ -164,12 +164,21 @@ public class HighlighterTest extends Bas
       public String toString(String field) {
         return null;
       }
-      
+
+      @Override
+      public int hashCode() {
+        return 31 * super.hashCode();
+      }
+
+      @Override
+      public boolean equals(Object obj) {
+        return super.equals(obj);
+      }
     };
     
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     TopDocs hits = searcher.search(query, 10);
     assertEquals(2, hits.totalHits);
     QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
@@ -238,7 +247,7 @@ public class HighlighterTest extends Bas
    */
   private String highlightField(Query query, String fieldName, String text)
       throws IOException, InvalidTokenOffsetsException {
-    TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)
+    TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)
         .tokenStream(fieldName, new StringReader(text));
     // Assuming "<B>", "</B>" used to highlight
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
@@ -413,7 +422,7 @@ public class HighlighterTest extends Bas
   
   public void testSpanRegexQuery() throws Exception {
     query = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(new RegexpQuery(new Term(FIELD_NAME, "ken.*"))));
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     hits = searcher.search(query, 100);
     int maxNumFragmentsRequired = 2;
 
@@ -437,7 +446,7 @@ public class HighlighterTest extends Bas
   
   public void testRegexQuery() throws Exception {
     query = new RegexpQuery(new Term(FIELD_NAME, "ken.*"));
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     hits = searcher.search(query, 100);
     int maxNumFragmentsRequired = 2;
 
@@ -459,10 +468,35 @@ public class HighlighterTest extends Bas
         numHighlights == 5);
   }
   
+  public void testExternalReader() throws Exception {
+    query = new RegexpQuery(new Term(FIELD_NAME, "ken.*"));
+    searcher = newSearcher(reader);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+
+    QueryScorer scorer = new QueryScorer(query, reader, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
+
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+
+      String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+          "...");
+      if (VERBOSE) System.out.println("\t" + result);
+    }
+    
+    assertTrue(reader.docFreq(new Term(FIELD_NAME, "hello")) > 0);
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 5);
+  }
+  
   public void testNumericRangeQuery() throws Exception {
     // doesn't currently highlight, but make sure it doesn't cause exception either
     query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     hits = searcher.search(query, 100);
     int maxNumFragmentsRequired = 2;
 
@@ -861,7 +895,7 @@ public class HighlighterTest extends Bas
 
     query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
     ((WildcardQuery)query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     // can't rewrite ConstantScore if you want to highlight it -
     // it rewrites to ConstantScoreQuery which cannot be highlighted
     // query = unReWrittenQuery.rewrite(reader);
@@ -1274,7 +1308,7 @@ public class HighlighterTest extends Bas
   }
 
   public void testMaxSizeHighlight() throws Exception {
-    final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+    final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
     // we disable MockTokenizer checks because we will forcefully limit the 
     // tokenstream and call end() before incrementToken() returns false.
     analyzer.setEnableChecks(false);
@@ -1309,7 +1343,7 @@ public class HighlighterTest extends Bas
         CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
         // we disable MockTokenizer checks because we will forcefully limit the 
         // tokenstream and call end() before incrementToken() returns false.
-        final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true);
+        final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
         analyzer.setEnableChecks(false);
         TermQuery query = new TermQuery(new Term("data", goodWord));
 
@@ -1360,7 +1394,7 @@ public class HighlighterTest extends Bas
         Highlighter hg = getHighlighter(query, "text", fm);
         hg.setTextFragmenter(new NullFragmenter());
         hg.setMaxDocCharsToAnalyze(36);
-        String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true), "text", text);
+        String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
         assertTrue(
             "Matched text should contain remainder of text after highlighted query ",
             match.endsWith("in it"));
@@ -1376,8 +1410,8 @@ public class HighlighterTest extends Bas
       public void run() throws Exception {
         numHighlights = 0;
         // test to show how rewritten query can still be used
-        searcher = new IndexSearcher(reader);
-        Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+        searcher = newSearcher(reader);
+        Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
         
         BooleanQuery query = new BooleanQuery();
         query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
@@ -1756,7 +1790,7 @@ public class HighlighterTest extends Bas
   private void searchIndex() throws IOException, InvalidTokenOffsetsException {
     Query query = new TermQuery(new Term("t_text1", "random"));
     IndexReader reader = DirectoryReader.open(dir);
-    IndexSearcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = newSearcher(reader);
     // This scorer can return negative idf -> null fragment
     Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
     // This scorer doesn't use idf (patch version)
@@ -1809,7 +1843,7 @@ public class HighlighterTest extends Bas
   }
 
   public void doSearching(Query unReWrittenQuery) throws Exception {
-    searcher = new IndexSearcher(reader);
+    searcher = newSearcher(reader);
     // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
     // you must use a rewritten query!
     query = unReWrittenQuery.rewrite(reader);
@@ -1841,11 +1875,11 @@ public class HighlighterTest extends Bas
     super.setUp();
 
     a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
-    analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+    analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
     dir = newDirectory();
     ramDir = newDirectory();
     IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(
-        TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+        TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
     for (String text : texts) {
       addDoc(writer, text);
     }

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java Tue May  7 11:20:55 2013
@@ -89,7 +89,7 @@ public class HighlightCustomQueryTest ex
   private String highlightField(Query query, String fieldName,
       String text) throws IOException, InvalidTokenOffsetsException {
     TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
-        true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
+        true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName,
         new StringReader(text));
     // Assuming "<B>", "</B>" used to highlight
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Tue May  7 11:20:55 2013
@@ -35,6 +35,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -46,8 +47,8 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighter extends LuceneTestCase {
@@ -456,7 +457,12 @@ public class TestPostingsHighlighter ext
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter());
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
@@ -497,7 +503,7 @@ public class TestPostingsHighlighter ext
     int[] docIDs = new int[2];
     docIDs[0] = hits[0].doc;
     docIDs[1] = hits[1].doc;
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 1).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
     assertEquals(2, snippets.length);
     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
     assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
@@ -526,7 +532,7 @@ public class TestPostingsHighlighter ext
     
     IndexSearcher searcher = newSearcher(ir);
 
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) {
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
         @Override
         protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
           assert fields.length == 1;
@@ -535,6 +541,11 @@ public class TestPostingsHighlighter ext
           contents[0][0] = text;
           return contents;
         }
+
+        @Override
+        protected BreakIterator getBreakIterator(String field) {
+          return new WholeBreakIterator();
+        }
       };
 
     Query query = new TermQuery(new Term("body", "test"));
@@ -571,7 +582,7 @@ public class TestPostingsHighlighter ext
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("test this is.  another sentence this test has.  ", snippets[0]);
 
@@ -607,7 +618,7 @@ public class TestPostingsHighlighter ext
       };
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);
 
@@ -635,10 +646,15 @@ public class TestPostingsHighlighter ext
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter());
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("test this is.  another sentence this test has.  far away is that planet.", snippets[0]);
 
@@ -669,7 +685,7 @@ public class TestPostingsHighlighter ext
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("bogus", "highlighting"));
     int[] docIDs = new int[] {0};
-    String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, 2).get("bogus");
+    String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);
 
@@ -705,7 +721,7 @@ public class TestPostingsHighlighter ext
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[1];
     docIDs[0] = docID;
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("   ", snippets[0]);
 
@@ -741,11 +757,132 @@ public class TestPostingsHighlighter ext
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[1];
     docIDs[0] = docID;
-    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
+    String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);
 
     ir.close();
     dir.close();
   }
+
+  public void testMultipleDocs() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+
+    int numDocs = atLeast(100);
+    for(int i=0;i<numDocs;i++) {
+      Document doc = new Document();
+      String content = "the answer is " + i;
+      if ((i & 1) == 0) {
+        content += " some more terms";
+      }
+      doc.add(new Field("body", content, offsetsType));
+      doc.add(newStringField("id", ""+i, Field.Store.YES));
+      iw.addDocument(doc);
+
+      if (random().nextInt(10) == 2) {
+        iw.commit();
+      }
+    }
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    Query query = new TermQuery(new Term("body", "answer"));
+    TopDocs hits = searcher.search(query, numDocs);
+    assertEquals(numDocs, hits.totalHits);
+
+    String snippets[] = highlighter.highlight("body", query, searcher, hits);
+    assertEquals(numDocs, snippets.length);
+    for(int hit=0;hit<numDocs;hit++) {
+      StoredDocument doc = searcher.doc(hits.scoreDocs[hit].doc);
+      int id = Integer.parseInt(doc.get("id"));
+      String expected = "the <b>answer</b> is " + id;
+      if ((id  & 1) == 0) {
+        expected += " some more terms";
+      }
+      assertEquals(expected, snippets[hit]);
+    }
+
+    ir.close();
+    dir.close();
+  }
+  
+  public void testMultipleSnippetSizes() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Field title = new Field("title", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    doc.add(title);
+    
+    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
+    query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
+    Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query, searcher, new int[] { 0 }, new int[] { 1, 2 });
+    String titleHighlight = snippets.get("title")[0];
+    String bodyHighlight = snippets.get("body")[0];
+    assertEquals("This is a <b>test</b>. ", titleHighlight);
+    assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
+    ir.close();
+    dir.close();
+  }
+  
+  public void testEncode() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", "", offsetsType);
+    Document doc = new Document();
+    doc.add(body);
+    
+    body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter() {
+      @Override
+      protected PassageFormatter getFormatter(String field) {
+        return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
+      }
+    };
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
+    assertEquals(1, snippets.length);
+    assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
+    
+    ir.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Tue May  7 11:20:55 2013
@@ -18,9 +18,7 @@ package org.apache.lucene.search.posting
  */
 
 import java.io.IOException;
-import java.text.BreakIterator;
 import java.util.HashSet;
-import java.util.Locale;
 import java.util.Random;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,10 +28,10 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -45,8 +43,8 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util._TestUtil;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighterRanking extends LuceneTestCase {
@@ -75,7 +73,7 @@ public class TestPostingsHighlighterRank
     document.add(id);
     document.add(body);
     
-    for (int i = 0; i < numDocs; i++) {;
+    for (int i = 0; i < numDocs; i++) {
       StringBuilder bodyText = new StringBuilder();
       int numSentences = _TestUtil.nextInt(random(), 1, maxNumSentences);
       for (int j = 0; j < numSentences; j++) {
@@ -112,16 +110,24 @@ public class TestPostingsHighlighterRank
   
   private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
     for (int n = 1; n < maxTopN; n++) {
-      FakePassageFormatter f1 = new FakePassageFormatter();
-      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                                       new PassageScorer(),
-                                                       f1);
-      FakePassageFormatter f2 = new FakePassageFormatter();
-      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                                       new PassageScorer(),
-                                                       f2);
+      final FakePassageFormatter f1 = new FakePassageFormatter();
+      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
+          @Override
+          protected PassageFormatter getFormatter(String field) {
+            assertEquals("body", field);
+            return f1;
+          }
+        };
+
+      final FakePassageFormatter f2 = new FakePassageFormatter();
+      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
+          @Override
+          protected PassageFormatter getFormatter(String field) {
+            assertEquals("body", field);
+            return f2;
+          }
+        };
+
       BooleanQuery bq = new BooleanQuery(false);
       bq.add(query, BooleanClause.Occur.MUST);
       bq.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
@@ -170,8 +176,7 @@ public class TestPostingsHighlighterRank
         // we use a very simple analyzer. so we can assert the matches are correct
         int lastMatchStart = -1;
         for (int i = 0; i < p.getNumMatches(); i++) {
-          Term term = p.getMatchTerms()[i];
-          assertEquals("body", term.field());
+          BytesRef term = p.getMatchTerms()[i];
           int matchStart = p.getMatchStarts()[i];
           assertTrue(matchStart >= 0);
           // must at least start within the passage
@@ -184,9 +189,8 @@ public class TestPostingsHighlighterRank
           // single character terms
           assertEquals(matchStart+1, matchEnd);
           // and the offsets must be correct...
-          BytesRef bytes = term.bytes();
-          assertEquals(1, bytes.length);
-          assertEquals((char)bytes.bytes[bytes.offset], Character.toLowerCase(content.charAt(matchStart)));
+          assertEquals(1, term.length);
+          assertEquals((char)term.bytes[term.offset], Character.toLowerCase(content.charAt(matchStart)));
         }
         // record just the start/end offset for simplicity
         seen.add(new Pair(p.getStartOffset(), p.getEndOffset()));
@@ -261,10 +265,12 @@ public class TestPostingsHighlighterRank
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, 
-                                             BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                             new PassageScorer(1.2f, 0, 87), 
-                                             new PassageFormatter());
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+        @Override
+        protected PassageScorer getScorer(String field) {
+          return new PassageScorer(1.2f, 0, 87);
+        }
+      };
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
@@ -298,10 +304,12 @@ public class TestPostingsHighlighterRank
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, 
-                                             BreakIterator.getSentenceInstance(Locale.ROOT), 
-                                             new PassageScorer(0, 0.75f, 87), 
-                                             new PassageFormatter());
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+        @Override
+        protected PassageScorer getScorer(String field) {
+          return new PassageScorer(0, 0.75f, 87);
+        }
+      };
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "foo")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("body", "bar")), BooleanClause.Occur.SHOULD);

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Tue May  7 11:20:55 2013
@@ -29,18 +29,19 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
-import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
 
 public class FastVectorHighlighterTest extends LuceneTestCase {
   
+  
   public void testSimpleHighlightTest() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
@@ -71,9 +72,182 @@ public class FastVectorHighlighterTest e
     dir.close();
   }
   
+  public void testPhraseHighlightLongTextTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field text = new Field("text", 
+        "Netscape was the general name for a series of web browsers originally produced by Netscape Communications Corporation, now a subsidiary of AOL The original browser was once the dominant browser in terms of usage share, but as a result of the first browser war it lost virtually all of its share to Internet Explorer Netscape was discontinued and support for all Netscape browsers and client products was terminated on March 1, 2008 Netscape Navigator was the name of Netscape\u0027s web browser from versions 1.0 through 4.8 The first beta release versions of the browser were released in 1994 and known as Mosaic and then Mosaic Netscape until a legal challenge from the National Center for Supercomputing Applications (makers of NCSA Mosaic, which many of Netscape\u0027s founders used to develop), led to the name change to Netscape Navigator The company\u0027s name also changed from Mosaic Communications Corporation to Netscape Communications Corporation The browser was eas
 ily the most advanced...", type);
+    doc.add(text);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "text";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "internet")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "explorer")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet</b> <b>Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "internet"));
+      query.add(new Term(field, "explorer"));
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 128, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("first browser war it lost virtually all of its share to <b>Internet Explorer</b> Netscape was discontinued and support for all Netscape browsers", bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
+  // see LUCENE-4899
+  public void testPhraseHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    Document doc = new Document();
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
+    Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
+
+    doc.add(longTermField);
+    doc.add(noLongTermField);
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    int docId = 0;
+    String field = "no_long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighed"));
+      query.setSlop(3);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(0, bestFragments.length);
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+      
+    }
+    {
+      PhraseQuery query = new PhraseQuery();
+      query.add(new Term(field, "test"));
+      query.add(new Term(field, "foo"));
+      query.add(new Term(field, "highlighted"));
+      query.setSlop(30);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+    }
+    {
+      BooleanQuery query = new BooleanQuery();
+      PhraseQuery pq = new PhraseQuery();
+      pq.add(new Term(field, "test"));
+      pq.add(new Term(field, "foo"));
+      pq.add(new Term(field, "highlighed"));
+      pq.setSlop(5);
+      BooleanQuery inner = new BooleanQuery();
+      inner.add(pq, Occur.MUST);
+      inner.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(inner, Occur.MUST);
+      query.add(pq, Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      assertEquals(0, bestFragments.length);
+      
+      bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 30, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
+    }
+    
+    field = "long_term";
+    {
+      BooleanQuery query = new BooleanQuery();
+      query.add(new TermQuery(new Term(field,
+          "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+      query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+      FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+      String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+          docId, field, 18, 1);
+      // highlighted results are centered
+      assertEquals(1, bestFragments.length);
+      assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>",
+          bestFragments[0]);
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
   public void testCommonTermsQueryHighlightTest() throws IOException {
     Directory dir = newDirectory();
-    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
     FieldType type = new FieldType(TextField.TYPE_STORED);
     type.setStoreTermVectorOffsets(true);
     type.setStoreTermVectorPositions(true);
@@ -98,7 +272,7 @@ public class FastVectorHighlighterTest e
    
     FastVectorHighlighter highlighter = new FastVectorHighlighter();
     IndexReader reader = DirectoryReader.open(writer, true);
-    IndexSearcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = newSearcher(reader);
     TopDocs hits = searcher.search(query, 10);
     assertEquals(2, hits.totalHits);
     FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java Tue May  7 11:20:55 2013
@@ -42,7 +42,7 @@ public class SimpleFragListBuilderTest e
     SimpleFragListBuilder sflb = new SimpleFragListBuilder();
     FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
-    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@@ -55,7 +55,7 @@ public class SimpleFragListBuilderTest e
     FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh   jklmnopqrs" ), sflb.minFragCharSize );
     assertEquals( 1, ffl.getFragInfos().size() );
     if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
-    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
+    assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
   }
   
   public void test1TermIndex() throws Exception {

Modified: lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (original)
+++ lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java Tue May  7 11:20:55 2013
@@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ComplexExplanation;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
@@ -192,12 +193,22 @@ class TermsIncludingScoreQuery extends Q
     DocsEnum docsEnum;
     DocsEnum reuse;
     int scoreUpto;
+    int doc;
 
     SVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost) {
       super(weight);
       this.acceptDocs = acceptDocs;
       this.termsEnum = termsEnum;
       this.cost = cost;
+      this.doc = -1;
+    }
+
+    @Override
+    public void score(Collector collector) throws IOException {
+      collector.setScorer(this);
+      for (int doc = nextDocOutOfOrder(); doc != NO_MORE_DOCS; doc = nextDocOutOfOrder()) {
+        collector.collect(doc);
+      }
     }
 
     @Override
@@ -211,23 +222,22 @@ class TermsIncludingScoreQuery extends Q
 
     @Override
     public int docID() {
-      return docsEnum != null ? docsEnum.docID() : DocIdSetIterator.NO_MORE_DOCS;
+      return doc;
     }
 
-    @Override
-    public int nextDoc() throws IOException {
+    int nextDocOutOfOrder() throws IOException {
       if (docsEnum != null) {
         int docId = docsEnum.nextDoc();
         if (docId == DocIdSetIterator.NO_MORE_DOCS) {
           docsEnum = null;
         } else {
-          return docId;
+          return doc = docId;
         }
       }
 
       do {
         if (upto == terms.size()) {
-          return DocIdSetIterator.NO_MORE_DOCS;
+          return doc = DocIdSetIterator.NO_MORE_DOCS;
         }
 
         scoreUpto = upto;
@@ -236,7 +246,12 @@ class TermsIncludingScoreQuery extends Q
         }
       } while (docsEnum == null);
 
-      return docsEnum.nextDoc();
+      return doc = docsEnum.nextDoc();
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      throw new UnsupportedOperationException("nextDoc() isn't supported because doc ids are emitted out of order");
     }
 
     @Override
@@ -247,7 +262,7 @@ class TermsIncludingScoreQuery extends Q
     private int advanceForExplainOnly(int target) throws IOException {
       int docId;
       do {
-        docId = nextDoc();
+        docId = nextDocOutOfOrder();
         if (docId < target) {
           int tempDocId = docsEnum.advance(target);
           if (tempDocId == target) {
@@ -286,7 +301,7 @@ class TermsIncludingScoreQuery extends Q
     }
 
     @Override
-    public int nextDoc() throws IOException {
+    int nextDocOutOfOrder() throws IOException {
       if (docsEnum != null) {
         int docId;
         do {

Modified: lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Tue May  7 11:20:55 2013
@@ -339,7 +339,8 @@ public class ToChildBlockJoinQuery exten
       final ToChildBlockJoinQuery other = (ToChildBlockJoinQuery) _other;
       return origParentQuery.equals(other.origParentQuery) &&
         parentsFilter.equals(other.parentsFilter) &&
-        doScores == other.doScores;
+        doScores == other.doScores &&
+        super.equals(other);
     } else {
       return false;
     }
@@ -348,7 +349,7 @@ public class ToChildBlockJoinQuery exten
   @Override
   public int hashCode() {
     final int prime = 31;
-    int hash = 1;
+    int hash = super.hashCode();
     hash = prime * hash + origParentQuery.hashCode();
     hash = prime * hash + new Boolean(doScores).hashCode();
     hash = prime * hash + parentsFilter.hashCode();

Modified: lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Tue May  7 11:20:55 2013
@@ -80,7 +80,7 @@ public class ToParentBlockJoinCollector 
 
   // Maps each BlockJoinQuery instance to its "slot" in
   // joinScorers and in OneGroup's cached doc/scores/count:
-  private final Map<Query,Integer> joinQueryID = new HashMap<Query,Integer>();
+  private final Map<Query,Integer> joinQueryID = new HashMap<>();
   private final int numParentHits;
   private final FieldValueHitQueue<OneGroup> queue;
   private final FieldComparator[] comparators;
@@ -111,6 +111,7 @@ public class ToParentBlockJoinCollector 
     if (trackMaxScore) {
       maxScore = Float.MIN_VALUE;
     }
+    //System.out.println("numParentHits=" + numParentHits);
     this.trackScores = trackScores;
     this.numParentHits = numParentHits;
     queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
@@ -122,6 +123,7 @@ public class ToParentBlockJoinCollector 
   private static final class OneGroup extends FieldValueHitQueue.Entry {
     public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
       super(comparatorSlot, parentDoc, parentScore);
+      //System.out.println("make OneGroup parentDoc=" + parentDoc);
       docs = new int[numJoins][];
       for(int joinID=0;joinID<numJoins;joinID++) {
         docs[joinID] = new int[5];
@@ -138,11 +140,11 @@ public class ToParentBlockJoinCollector 
     int[][] docs;
     float[][] scores;
     int[] counts;
-  };
+  }
 
   @Override
   public void collect(int parentDoc) throws IOException {
-    //System.out.println("C parentDoc=" + parentDoc);
+    //System.out.println("\nC parentDoc=" + parentDoc);
     totalHitCount++;
 
     float score = Float.NaN;
@@ -203,8 +205,7 @@ public class ToParentBlockJoinCollector 
       for (int i = 0; i < comparators.length; i++) {
         comparators[i].copy(comparatorSlot, parentDoc);
       }
-      //System.out.println("  startup: new OG doc=" +
-      //(docBase+parentDoc));
+      //System.out.println("  startup: new OG doc=" + (docBase+parentDoc));
       if (!trackMaxScore && trackScores) {
         score = scorer.score();
       }
@@ -241,22 +242,28 @@ public class ToParentBlockJoinCollector 
       og.scores = ArrayUtil.grow(og.scores);
     }
 
-    //System.out.println("copyGroups parentDoc=" + og.doc);
+    //System.out.println("\ncopyGroups parentDoc=" + og.doc);
     for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
       final ToParentBlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
       //System.out.println("  scorer=" + joinScorer);
-      if (joinScorer != null) {
+      if (joinScorer != null && docBase + joinScorer.getParentDoc() == og.doc) {
         og.counts[scorerIDX] = joinScorer.getChildCount();
         //System.out.println("    count=" + og.counts[scorerIDX]);
         og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
+        assert og.docs[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.docs[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
+        //System.out.println("    len=" + og.docs[scorerIDX].length);
         /*
-        for(int idx=0;idx<og.counts[scorerIDX];idx++) {
+          for(int idx=0;idx<og.counts[scorerIDX];idx++) {
           System.out.println("    docs[" + idx + "]=" + og.docs[scorerIDX][idx]);
-        }
+          }
         */
         if (trackScores) {
+          //System.out.println("    copy scores");
           og.scores[scorerIDX] = joinScorer.swapChildScores(og.scores[scorerIDX]);
+          assert og.scores[scorerIDX].length >= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX];
         }
+      } else {
+        og.counts[scorerIDX] = 0;
       }
     }
   }
@@ -302,13 +309,16 @@ public class ToParentBlockJoinCollector 
     Arrays.fill(joinScorers, null);
 
     Queue<Scorer> queue = new LinkedList<Scorer>();
+    //System.out.println("\nqueue: add top scorer=" + scorer);
     queue.add(scorer);
     while ((scorer = queue.poll()) != null) {
+      //System.out.println("  poll: " + scorer + "; " + scorer.getWeight().getQuery());
       if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
         enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
       }
 
       for (ChildScorer sub : scorer.getChildren()) {
+        //System.out.println("  add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
         queue.add(sub.child);
       }
     }
@@ -384,12 +394,8 @@ public class ToParentBlockJoinCollector 
     throws IOException {
 
     final Integer _slot = joinQueryID.get(query);
-    if (_slot == null) {
-      if (totalHitCount == 0) {
-        return null;
-      } else {
-        throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery");
-      }
+    if (_slot == null && totalHitCount == 0) {
+      return null;
     }
 
     if (sortedGroups == null) {
@@ -401,7 +407,7 @@ public class ToParentBlockJoinCollector 
       return null;
     }
 
-    return accumulateGroups(_slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
+    return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
   }
 
   /**
@@ -423,18 +429,26 @@ public class ToParentBlockJoinCollector 
     final FakeScorer fakeScorer = new FakeScorer();
 
     int totalGroupedHitCount = 0;
+    //System.out.println("slot=" + slot);
 
     for(int groupIDX=offset;groupIDX<sortedGroups.length;groupIDX++) {
       final OneGroup og = sortedGroups[groupIDX];
-      final int numChildDocs = og.counts[slot];
+      final int numChildDocs;
+      if (slot == -1 || slot >= og.counts.length) {
+        numChildDocs = 0;
+      } else {
+        numChildDocs = og.counts[slot];
+      }
 
       // Number of documents in group should be bounded to prevent redundant memory allocation
-      final int numDocsInGroup = Math.min(numChildDocs, maxDocsPerGroup);
+      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
+      //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup);
 
       // At this point we hold all docs w/ in each group,
       // unsorted; we now sort them:
       final TopDocsCollector<?> collector;
       if (withinGroupSort == null) {
+        //System.out.println("sort by score");
         // Sort by score
         if (!trackScores) {
           throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false");
@@ -448,6 +462,7 @@ public class ToParentBlockJoinCollector 
       collector.setScorer(fakeScorer);
       collector.setNextReader(og.readerContext);
       for(int docIDX=0;docIDX<numChildDocs;docIDX++) {
+        //System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
         final int doc = og.docs[slot][docIDX];
         fakeScorer.doc = doc;
         if (trackScores) {
@@ -470,7 +485,7 @@ public class ToParentBlockJoinCollector 
 
       final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);
 
-      groups[groupIDX-offset] = new GroupDocs<Integer>(og.score,
+      groups[groupIDX-offset] = new GroupDocs<>(og.score,
                                                        topDocs.getMaxScore(),
                                                        numChildDocs,
                                                        topDocs.scoreDocs,
@@ -478,9 +493,9 @@ public class ToParentBlockJoinCollector 
                                                        groupSortValues);
     }
 
-    return new TopGroups<Integer>(new TopGroups<Integer>(sort.getSort(),
-                                                         withinGroupSort == null ? null : withinGroupSort.getSort(),
-                                                         0, totalGroupedHitCount, groups, maxScore),
+    return new TopGroups<>(new TopGroups<>(sort.getSort(),
+                                                       withinGroupSort == null ? null : withinGroupSort.getSort(),
+                                                       0, totalGroupedHitCount, groups, maxScore),
                                   totalHitCount);
   }
 

Modified: lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene4258/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Tue May  7 11:20:55 2013
@@ -181,8 +181,7 @@ public class ToParentBlockJoinQuery exte
       // acceptDocs when we score:
       final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
 
-      if (parents == null
-          || parents.iterator().docID() == DocIdSetIterator.NO_MORE_DOCS) { // <-- means DocIdSet#EMPTY_DOCIDSET
+      if (parents == null) {
         // No matches
         return null;
       }
@@ -196,10 +195,8 @@ public class ToParentBlockJoinQuery exte
     @Override
     public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
       BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs());
-      if (scorer != null) {
-        if (scorer.advance(doc) == doc) {
-          return scorer.explain(context.docBase);
-        }
+      if (scorer != null && scorer.advance(doc) == doc) {
+        return scorer.explain(context.docBase);
       }
       return new ComplexExplanation(false, 0.0f, "Not a match");
     }
@@ -247,6 +244,10 @@ public class ToParentBlockJoinQuery exte
       return childDocUpto;
     }
 
+    int getParentDoc() {
+      return parentDoc;
+    }
+
     int[] swapChildDocs(int[] other) {
       final int[] ret = pendingChildDocs;
       if (other == null) {
@@ -273,7 +274,6 @@ public class ToParentBlockJoinQuery exte
     @Override
     public int nextDoc() throws IOException {
       //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-
       // Loop until we hit a parentDoc that's accepted
       while (true) {
         if (nextChildDoc == NO_MORE_DOCS) {
@@ -286,6 +286,12 @@ public class ToParentBlockJoinQuery exte
 
         parentDoc = parentBits.nextSetBit(nextChildDoc);
 
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
+
         //System.out.println("  parentDoc=" + parentDoc);
         assert parentDoc != -1;
 
@@ -296,6 +302,13 @@ public class ToParentBlockJoinQuery exte
           do {
             nextChildDoc = childScorer.nextDoc();
           } while (nextChildDoc < parentDoc);
+
+          // Parent & child docs are supposed to be
+          // orthogonal:
+          if (nextChildDoc == parentDoc) {
+            throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+          }
+
           continue;
         }
 
@@ -327,8 +340,11 @@ public class ToParentBlockJoinQuery exte
           nextChildDoc = childScorer.nextDoc();
         } while (nextChildDoc < parentDoc);
 
-        // Parent & child docs are supposed to be orthogonal:
-        assert nextChildDoc != parentDoc;
+        // Parent & child docs are supposed to be
+        // orthogonal:
+        if (nextChildDoc == parentDoc) {
+          throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+        }
 
         switch(scoreMode) {
         case Avg:
@@ -344,7 +360,7 @@ public class ToParentBlockJoinQuery exte
           break;
         }
 
-        //System.out.println("  return parentDoc=" + parentDoc);
+        //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
         return parentDoc;
       }
     }
@@ -394,7 +410,9 @@ public class ToParentBlockJoinQuery exte
       }
 
       // Parent & child docs are supposed to be orthogonal:
-      assert nextChildDoc != prevParentDoc;
+      if (nextChildDoc == prevParentDoc) {
+        throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass());
+      }
 
       final int nd = nextDoc();
       //System.out.println("  return nextParentDoc=" + nd);
@@ -446,7 +464,8 @@ public class ToParentBlockJoinQuery exte
       final ToParentBlockJoinQuery other = (ToParentBlockJoinQuery) _other;
       return origChildQuery.equals(other.origChildQuery) &&
         parentsFilter.equals(other.parentsFilter) &&
-        scoreMode == other.scoreMode;
+        scoreMode == other.scoreMode && 
+        super.equals(other);
     } else {
       return false;
     }
@@ -455,17 +474,10 @@ public class ToParentBlockJoinQuery exte
   @Override
   public int hashCode() {
     final int prime = 31;
-    int hash = 1;
+    int hash = super.hashCode();
     hash = prime * hash + origChildQuery.hashCode();
     hash = prime * hash + scoreMode.hashCode();
     hash = prime * hash + parentsFilter.hashCode();
     return hash;
   }
-
-  @Override
-  public ToParentBlockJoinQuery clone() {
-    return new ToParentBlockJoinQuery(origChildQuery.clone(),
-                              parentsFilter,
-                              scoreMode);
-  }
 }