You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/05/25 17:55:21 UTC

[12/44] lucene-solr:jira/solr-8668: LUCENE-7815: Removed the PostingsHighlighter

LUCENE-7815: Removed the PostingsHighlighter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0d3c73ea
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0d3c73ea
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0d3c73ea

Branch: refs/heads/jira/solr-8668
Commit: 0d3c73eaa2dd26af73461fd6ec3494bc12edbe8a
Parents: 14320a5
Author: David Smiley <ds...@apache.org>
Authored: Tue May 23 14:39:51 2017 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Tue May 23 14:39:51 2017 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |    4 +
 lucene/benchmark/conf/highlighters-postings.alg |    4 +-
 .../tasks/SearchTravRetHighlightTask.java       |   30 -
 .../DefaultPassageFormatter.java                |  137 --
 .../MultiTermHighlighting.java                  |  282 -----
 .../search/postingshighlight/Passage.java       |  159 ---
 .../postingshighlight/PassageFormatter.java     |   40 -
 .../search/postingshighlight/PassageScorer.java |  104 --
 .../postingshighlight/PostingsHighlighter.java  |  820 ------------
 .../search/postingshighlight/package-info.java  |   21 -
 .../CustomSeparatorBreakIterator.java           |  150 +++
 .../search/uhighlight/WholeBreakIterator.java   |  116 ++
 .../search/postingshighlight/CambridgeMA.utf8   |    1 -
 .../TestMultiTermHighlighting.java              |  884 -------------
 .../TestPostingsHighlighter.java                | 1185 ------------------
 .../TestPostingsHighlighterRanking.java         |  324 -----
 .../uhighlight/LengthGoalBreakIteratorTest.java |    1 -
 .../TestCustomSeparatorBreakIterator.java       |  114 ++
 .../uhighlight/TestUnifiedHighlighter.java      |    1 -
 .../uhighlight/TestWholeBreakIterator.java      |  134 ++
 .../solr/highlight/UnifiedSolrHighlighter.java  |    4 +-
 21 files changed, 522 insertions(+), 3993 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3951cea..dadba8b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -53,6 +53,10 @@ API Changes
 * LUCENE-7741: DoubleValuesSource now has an explain() method (Alan Woodward,
   Adrien Grand)
 
+* LUCENE-7815: Removed the PostingsHighlighter; you should use the UnifiedHighlighter
+  instead, which derived from the UH.  WholeBreakIterator and
+  CustomSeparatorBreakIterator were moved to UH's package. (David Smiley)
+
 Bug Fixes
 
 * LUCENE-7626: IndexWriter will no longer accept broken token offsets

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/benchmark/conf/highlighters-postings.alg
----------------------------------------------------------------------
diff --git a/lucene/benchmark/conf/highlighters-postings.alg b/lucene/benchmark/conf/highlighters-postings.alg
index 610908f..2560dad 100644
--- a/lucene/benchmark/conf/highlighters-postings.alg
+++ b/lucene/benchmark/conf/highlighters-postings.alg
@@ -38,7 +38,7 @@ file.query.maker.file=conf/query-terms.txt
 log.queries=false
 log.step.SearchTravRetHighlight=-1
 
-highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV
+highlighter=HlImpl:NONE:SH_A:UH_A:UH_P:UH_PV
 
 { "Populate"
         CreateIndex
@@ -60,6 +60,6 @@ highlighter=HlImpl:NONE:SH_A:UH_A:PH_P:UH_P:UH_PV
         CloseReader
 
         NewRound
-} : 6
+} : 5
 
 RepSumByPrefRound HL
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
----------------------------------------------------------------------
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
index f36854d..d90d3a7 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java
@@ -42,7 +42,6 @@ import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TokenSources;
-import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
@@ -133,8 +132,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
       case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
       case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;
 
-      case "PH_P": hlImpl = new PostingsHLImpl(); break;
-
       default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
     }
   }
@@ -224,33 +221,6 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
     return clone;
   }
 
-  private class PostingsHLImpl implements HLImpl {
-    PostingsHighlighter highlighter;
-    String[] fields = hlFields.toArray(new String[hlFields.size()]);
-    int[] maxPassages;
-    PostingsHLImpl() {
-      highlighter = new PostingsHighlighter(maxDocCharsToAnalyze) {
-        @Override
-        protected Analyzer getIndexAnalyzer(String field) { // thus support wildcards
-          return analyzer;
-        }
-
-        @Override
-        protected BreakIterator getBreakIterator(String field) {
-          return BreakIterator.getSentenceInstance(Locale.ENGLISH);
-        }
-      };
-      maxPassages = new int[hlFields.size()];
-      Arrays.fill(maxPassages, maxFrags);
-    }
-
-    @Override
-    public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
-      Map<String, String[]> result = highlighter.highlightFields(fields, q, searcher, hits, maxPassages);
-      preventOptimizeAway = result.size();
-    }
-  }
-
   private class UnifiedHLImpl implements HLImpl {
     UnifiedHighlighter highlighter;
     IndexSearcher lastSearcher;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java
deleted file mode 100644
index 73822c8..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-/**
- * Creates a formatted snippet from the top passages.
- * <p>
- * The default implementation marks the query terms as bold, and places
- * ellipses between unconnected passages.
- */
-public class DefaultPassageFormatter extends PassageFormatter {
-  /** text that will appear before highlighted terms */
-  protected final String preTag;
-  /** text that will appear after highlighted terms */
-  protected final String postTag;
-  /** text that will appear between two unconnected passages */
-  protected final String ellipsis;
-  /** true if we should escape for html */
-  protected final boolean escape;
-
-  /**
-   * Creates a new DefaultPassageFormatter with the default tags.
-   */
-  public DefaultPassageFormatter() {
-    this("<b>", "</b>", "... ", false);
-  }
-
-  /**
-   * Creates a new DefaultPassageFormatter with custom tags.
-   * @param preTag text which should appear before a highlighted term.
-   * @param postTag text which should appear after a highlighted term.
-   * @param ellipsis text which should be used to connect two unconnected passages.
-   * @param escape true if text should be html-escaped
-   */
-  public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
-    if (preTag == null || postTag == null || ellipsis == null) {
-      throw new NullPointerException();
-    }
-    this.preTag = preTag;
-    this.postTag = postTag;
-    this.ellipsis = ellipsis;
-    this.escape = escape;
-  }
-
-  @Override
-  public String format(Passage passages[], String content) {
-    StringBuilder sb = new StringBuilder();
-    int pos = 0;
-    for (Passage passage : passages) {
-      // don't add ellipsis if it's the first one, or if it's connected.
-      if (passage.startOffset > pos && pos > 0) {
-        sb.append(ellipsis);
-      }
-      pos = passage.startOffset;
-      for (int i = 0; i < passage.numMatches; i++) {
-        int start = passage.matchStarts[i];
-        int end = passage.matchEnds[i];
-        // it's possible to have overlapping terms
-        if (start > pos) {
-          append(sb, content, pos, start);
-        }
-        if (end > pos) {
-          sb.append(preTag);
-          append(sb, content, Math.max(pos, start), end);
-          sb.append(postTag);
-          pos = end;
-        }
-      }
-      // it's possible a "term" from the analyzer could span a sentence boundary.
-      append(sb, content, pos, Math.max(pos, passage.endOffset));
-      pos = passage.endOffset;
-    }
-    return sb.toString();
-  }
-
-  /** 
-   * Appends original text to the response.
-   * @param dest resulting text, possibly transformed or encoded
-   * @param content original text content
-   * @param start index of the first character in content
-   * @param end index of the character following the last character in content
-   */
-  protected void append(StringBuilder dest, String content, int start, int end) {
-    if (escape) {
-      // note: these are the rules from owasp.org
-      for (int i = start; i < end; i++) {
-        char ch = content.charAt(i);
-        switch(ch) {
-          case '&':
-            dest.append("&amp;");
-            break;
-          case '<':
-            dest.append("&lt;");
-            break;
-          case '>':
-            dest.append("&gt;");
-            break;
-          case '"':
-            dest.append("&quot;");
-            break;
-          case '\'':
-            dest.append("&#x27;");
-            break;
-          case '/':
-            dest.append("&#x2F;");
-            break;
-          default:
-            if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
-              dest.append(ch);
-            } else if (ch < 0xff) {
-              dest.append("&#");
-              dest.append((int)ch);
-              dest.append(";");
-            } else {
-              dest.append(ch);
-            }
-        }
-      }
-    } else {
-      dest.append(content, start, end);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
deleted file mode 100644
index c9733d3..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.AutomatonQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.DisjunctionMaxQuery;
-import org.apache.lucene.search.FuzzyQuery;
-import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanNotQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanPositionCheckQuery;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.LevenshteinAutomata;
-import org.apache.lucene.util.automaton.Operations;
-
-/**
- * Support for highlighting multiterm queries in PostingsHighlighter.
- */
-class MultiTermHighlighting {
-  
-  /** 
-   * Extracts all MultiTermQueries for {@code field}, and returns equivalent 
-   * automata that will match terms.
-   */
-  static CharacterRunAutomaton[] extractAutomata(Query query, String field) {
-    List<CharacterRunAutomaton> list = new ArrayList<>();
-    if (query instanceof BooleanQuery) {
-      for (BooleanClause clause : (BooleanQuery) query) {
-        if (!clause.isProhibited()) {
-          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
-        }
-      }
-    } else if (query instanceof ConstantScoreQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field)));
-    } else if (query instanceof DisjunctionMaxQuery) {
-      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field)));
-      }
-    } else if (query instanceof SpanOrQuery) {
-      for (Query sub : ((SpanOrQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field)));
-      }
-    } else if (query instanceof SpanNearQuery) {
-      for (Query sub : ((SpanNearQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field)));
-      }
-    } else if (query instanceof SpanNotQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field)));
-    } else if (query instanceof SpanPositionCheckQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
-    } else if (query instanceof SpanMultiTermQueryWrapper) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
-    } else if (query instanceof PrefixQuery) {
-      final PrefixQuery pq = (PrefixQuery) query;
-      Term prefix = pq.getPrefix();
-      if (prefix.field().equals(field)) {
-        list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), 
-                                                                       Automata.makeAnyString())) {
-          @Override
-          public String toString() {
-            return pq.toString();
-          }
-        });
-      }
-    } else if (query instanceof FuzzyQuery) {
-      final FuzzyQuery fq = (FuzzyQuery) query;
-      if (fq.getField().equals(field)) {
-        String utf16 = fq.getTerm().text();
-        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
-        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
-          termText[j++] = cp = utf16.codePointAt(i);
-        }
-        int termLength = termText.length;
-        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
-        String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
-        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
-        String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
-        Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
-        list.add(new CharacterRunAutomaton(automaton) {
-          @Override
-          public String toString() {
-            return fq.toString();
-          }
-        });
-      }
-    } else if (query instanceof TermRangeQuery) {
-      final TermRangeQuery tq = (TermRangeQuery) query;
-      if (tq.getField().equals(field)) {
-        final CharsRef lowerBound;
-        if (tq.getLowerTerm() == null) {
-          lowerBound = null;
-        } else {
-          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
-        }
-        
-        final CharsRef upperBound;
-        if (tq.getUpperTerm() == null) {
-          upperBound = null;
-        } else {
-          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
-        }
-        
-        final boolean includeLower = tq.includesLower();
-        final boolean includeUpper = tq.includesUpper();
-        final CharsRef scratch = new CharsRef();
-        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
-        
-        // this is *not* an automaton, but it's very simple
-        list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
-          @Override
-          public boolean run(char[] s, int offset, int length) {
-            scratch.chars = s;
-            scratch.offset = offset;
-            scratch.length = length;
-            
-            if (lowerBound != null) {
-              int cmp = comparator.compare(scratch, lowerBound);
-              if (cmp < 0 || (!includeLower && cmp == 0)) {
-                return false;
-              }
-            }
-            
-            if (upperBound != null) {
-              int cmp = comparator.compare(scratch, upperBound);
-              if (cmp > 0 || (!includeUpper && cmp == 0)) {
-                return false;
-              }
-            }
-            return true;
-          }
-
-          @Override
-          public String toString() {
-            return tq.toString();
-          }
-        });
-      }
-    } else if (query instanceof AutomatonQuery) {
-      final AutomatonQuery aq = (AutomatonQuery) query;
-      if (aq.getField().equals(field)) {
-        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
-          @Override
-          public String toString() {
-            return aq.toString();
-          }
-        });
-      }
-    }
-    return list.toArray(new CharacterRunAutomaton[list.size()]);
-  }
-  
-  /** 
-   * Returns a "fake" DocsAndPositionsEnum over the tokenstream, returning offsets where {@code matchers}
-   * matches tokens.
-   * <p>
-   * This is solely used internally by PostingsHighlighter: <b>DO NOT USE THIS METHOD!</b>
-   */
-  static PostingsEnum getDocsEnum(final TokenStream ts, final CharacterRunAutomaton[] matchers) throws IOException {
-    final CharTermAttribute charTermAtt = ts.addAttribute(CharTermAttribute.class);
-    final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
-    ts.reset();
-    
-    // TODO: we could use CachingWrapperFilter, (or consume twice) to allow us to have a true freq()
-    // but this would have a performance cost for likely little gain in the user experience, it
-    // would only serve to make this method less bogus.
-    // instead, we always return freq() = Integer.MAX_VALUE and let PH terminate based on offset...
-    
-    return new PostingsEnum() {
-      int currentDoc = -1;
-      int currentMatch = -1;
-      int currentStartOffset = -1;
-      int currentEndOffset = -1;
-      TokenStream stream = ts;
-      
-      final BytesRef matchDescriptions[] = new BytesRef[matchers.length];
-      
-      @Override
-      public int nextPosition() throws IOException {
-        if (stream != null) {
-          while (stream.incrementToken()) {
-            for (int i = 0; i < matchers.length; i++) {
-              if (matchers[i].run(charTermAtt.buffer(), 0, charTermAtt.length())) {
-                currentStartOffset = offsetAtt.startOffset();
-                currentEndOffset = offsetAtt.endOffset();
-                currentMatch = i;
-                return 0;
-              }
-            }
-          }
-          stream.end();
-          stream.close();
-          stream = null;
-        }
-        // exhausted
-        currentStartOffset = currentEndOffset = Integer.MAX_VALUE;
-        return Integer.MAX_VALUE;
-      }
-
-      @Override
-      public int freq() throws IOException {
-        return Integer.MAX_VALUE; // lie
-      }
-
-      @Override
-      public int startOffset() throws IOException {
-        assert currentStartOffset >= 0;
-        return currentStartOffset;
-      }
-
-      @Override
-      public int endOffset() throws IOException {
-        assert currentEndOffset >= 0;
-        return currentEndOffset;
-      }
-
-      @Override
-      public BytesRef getPayload() throws IOException {
-        if (matchDescriptions[currentMatch] == null) {
-          matchDescriptions[currentMatch] = new BytesRef(matchers[currentMatch].toString());
-        }
-        return matchDescriptions[currentMatch];
-      }
-
-      @Override
-      public int docID() {
-        return currentDoc;
-      }
-
-      @Override
-      public int nextDoc() throws IOException {
-        throw new UnsupportedOperationException();
-      }
-
-      @Override
-      public int advance(int target) throws IOException {
-        return currentDoc = target;
-      }
-
-      @Override
-      public long cost() {
-        return 0;
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
deleted file mode 100644
index 50aebea..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * Represents a passage (typically a sentence of the document). 
- * <p>
- * A passage contains {@link #getNumMatches} highlights from the query,
- * and the offsets and query terms that correspond with each match.
- * @lucene.experimental
- */
-public final class Passage {
-  int startOffset = -1;
-  int endOffset = -1;
-  float score = 0.0f;
-
-  int matchStarts[] = new int[8];
-  int matchEnds[] = new int[8];
-  BytesRef matchTerms[] = new BytesRef[8];
-  int numMatches = 0;
-  
-  void addMatch(int startOffset, int endOffset, BytesRef term) {
-    assert startOffset >= this.startOffset && startOffset <= this.endOffset;
-    if (numMatches == matchStarts.length) {
-      int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-      int newMatchStarts[] = new int[newLength];
-      int newMatchEnds[] = new int[newLength];
-      BytesRef newMatchTerms[] = new BytesRef[newLength];
-      System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
-      System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
-      System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
-      matchStarts = newMatchStarts;
-      matchEnds = newMatchEnds;
-      matchTerms = newMatchTerms;
-    }
-    assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
-    matchStarts[numMatches] = startOffset;
-    matchEnds[numMatches] = endOffset;
-    matchTerms[numMatches] = term;
-    numMatches++;
-  }
-  
-  void sort() {
-    final int starts[] = matchStarts;
-    final int ends[] = matchEnds;
-    final BytesRef terms[] = matchTerms;
-    new InPlaceMergeSorter() {
-      @Override
-      protected void swap(int i, int j) {
-        int temp = starts[i];
-        starts[i] = starts[j];
-        starts[j] = temp;
-        
-        temp = ends[i];
-        ends[i] = ends[j];
-        ends[j] = temp;
-        
-        BytesRef tempTerm = terms[i];
-        terms[i] = terms[j];
-        terms[j] = tempTerm;
-      }
-
-      @Override
-      protected int compare(int i, int j) {
-        return Integer.compare(starts[i], starts[j]);
-      }
-
-    }.sort(0, numMatches);
-  }
-  
-  void reset() {
-    startOffset = endOffset = -1;
-    score = 0.0f;
-    numMatches = 0;
-  }
-
-  /**
-   * Start offset of this passage.
-   * @return start index (inclusive) of the passage in the 
-   *         original content: always &gt;= 0.
-   */
-  public int getStartOffset() {
-    return startOffset;
-  }
-
-  /**
-   * End offset of this passage.
-   * @return end index (exclusive) of the passage in the 
-   *         original content: always &gt;= {@link #getStartOffset()}
-   */
-  public int getEndOffset() {
-    return endOffset;
-  }
-
-  /**
-   * Passage's score.
-   */
-  public float getScore() {
-    return score;
-  }
-  
-  /**
-   * Number of term matches available in 
-   * {@link #getMatchStarts}, {@link #getMatchEnds}, 
-   * {@link #getMatchTerms}
-   */
-  public int getNumMatches() {
-    return numMatches;
-  }
-
-  /**
-   * Start offsets of the term matches, in increasing order.
-   * <p>
-   * Only {@link #getNumMatches} are valid. Note that these
-   * offsets are absolute (not relative to {@link #getStartOffset()}).
-   */
-  public int[] getMatchStarts() {
-    return matchStarts;
-  }
-
-  /**
-   * End offsets of the term matches, corresponding with {@link #getMatchStarts}. 
-   * <p>
-   * Only {@link #getNumMatches} are valid. Note that it's possible that an end offset 
-   * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the 
-   * Analyzer produced a term which spans a passage boundary.
-   */
-  public int[] getMatchEnds() {
-    return matchEnds;
-  }
-
-  /**
-   * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
-   * <p>
-   * Only {@link #getNumMatches()} are valid.
-   */
-  public BytesRef[] getMatchTerms() {
-    return matchTerms;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
deleted file mode 100644
index f1596c1..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-/**
- * Creates a formatted snippet from the top passages.
- *
- * @lucene.experimental
- */
-public abstract class PassageFormatter {
-
-  /**
-   * Formats the top <code>passages</code> from <code>content</code>
-   * into a human-readable text snippet.
-   *
-   * @param passages top-N passages for the field. Note these are sorted in
-   *        the order that they appear in the document for convenience.
-   * @param content content for the field.
-   * @return formatted highlight.  Note that for the
-   * non-expert APIs in {@link PostingsHighlighter} that
-   * return String, the toString method on the Object
-   * returned by this method is used to compute the string.
-   */
-  public abstract Object format(Passage passages[], String content);
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java
deleted file mode 100644
index 1f74f7a..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageScorer.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-/** 
- * Ranks passages found by {@link PostingsHighlighter}.
- * <p>
- * Each passage is scored as a miniature document within the document.
- * The final score is computed as {@link #norm} * &sum; ({@link #weight} * {@link #tf}).
- * The default implementation is {@link #norm} * BM25.
- * @lucene.experimental
- */
-public class PassageScorer {
-  
-  // TODO: this formula is completely made up. It might not provide relevant snippets!
-  
-  /** BM25 k1 parameter, controls term frequency normalization */
-  final float k1;
-  /** BM25 b parameter, controls length normalization. */
-  final float b;
-  /** A pivot used for length normalization. */
-  final float pivot;
-  
-  /**
-   * Creates PassageScorer with these default values:
-   * <ul>
-   *   <li>{@code k1 = 1.2},
-   *   <li>{@code b = 0.75}.
-   *   <li>{@code pivot = 87}
-   * </ul>
-   */
-  public PassageScorer() {
-    // 1.2 and 0.75 are well-known bm25 defaults (but maybe not the best here) ?
-    // 87 is typical average english sentence length.
-    this(1.2f, 0.75f, 87f);
-  }
-  
-  /**
-   * Creates PassageScorer with specified scoring parameters
-   * @param k1 Controls non-linear term frequency normalization (saturation).
-   * @param b Controls to what degree passage length normalizes tf values.
-   * @param pivot Pivot value for length normalization (some rough idea of average sentence length in characters).
-   */
-  public PassageScorer(float k1, float b, float pivot) {
-    this.k1 = k1;
-    this.b = b;
-    this.pivot = pivot;
-  }
-    
-  /**
-   * Computes term importance, given its in-document statistics.
-   * 
-   * @param contentLength length of document in characters
-   * @param totalTermFreq number of time term occurs in document
-   * @return term importance
-   */
-  public float weight(int contentLength, int totalTermFreq) {
-    // approximate #docs from content length
-    float numDocs = 1 + contentLength / pivot;
-    // numDocs not numDocs - docFreq (ala DFR), since we approximate numDocs
-    return (k1 + 1) * (float) Math.log(1 + (numDocs + 0.5D)/(totalTermFreq + 0.5D));
-  }
-
-  /**
-   * Computes term weight, given the frequency within the passage
-   * and the passage's length.
-   * 
-   * @param freq number of occurrences of within this passage
-   * @param passageLen length of the passage in characters.
-   * @return term weight
-   */
-  public float tf(int freq, int passageLen) {
-    float norm = k1 * ((1 - b) + b * (passageLen / pivot));
-    return freq / (freq + norm);
-  }
-    
-  /**
-   * Normalize a passage according to its position in the document.
-   * <p>
-   * Typically passages towards the beginning of the document are 
-   * more useful for summarizing the contents.
-   * <p>
-   * The default implementation is <code>1 + 1/log(pivot + passageStart)</code>
-   * @param passageStart start offset of the passage
-   * @return a boost value multiplied into the passage's core.
-   */
-  public float norm(int passageStart) {
-    return 1 + 1/(float)Math.log(pivot + passageStart);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
deleted file mode 100644
index e4d3667..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
+++ /dev/null
@@ -1,820 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.PriorityQueue;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.MultiReader;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-
-/**
- * Simple highlighter that does not analyze fields nor use
- * term vectors. Instead it requires 
- * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
- * <p>
- * PostingsHighlighter treats the single original document as the whole corpus, and then scores individual
- * passages as if they were documents in this corpus. It uses a {@link BreakIterator} to find 
- * passages in the text; by default it breaks using {@link BreakIterator#getSentenceInstance(Locale) 
- * getSentenceInstance(Locale.ROOT)}. It then iterates in parallel (merge sorting by offset) through 
- * the positions of all terms from the query, coalescing those hits that occur in a single passage 
- * into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}. 
- * Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}.
- * <p>
- * You can customize the behavior by subclassing this highlighter, some important hooks:
- * <ul>
- *   <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
- *   <li>{@link #getScorer(String)}: Customize how passages are ranked.
- *   <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
- *   <li>{@link #getIndexAnalyzer(String)}: Enable highlighting of MultiTermQuerys such as {@code WildcardQuery}.
- * </ul>
- * <p>
- * <b>WARNING</b>: The code is very new and probably still has some exciting bugs!
- * <p>
- * Example usage:
- * <pre class="prettyprint">
- *   // configure field with offsets at index time
- *   FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- *   offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- *   Field body = new Field("body", "foobar", offsetsType);
- *
- *   // retrieve highlights at query time 
- *   PostingsHighlighter highlighter = new PostingsHighlighter();
- *   Query query = new TermQuery(new Term("body", "highlighting"));
- *   TopDocs topDocs = searcher.search(query, n);
- *   String highlights[] = highlighter.highlight("body", query, searcher, topDocs);
- * </pre>
- * <p>
- * This is thread-safe, and can be used across different readers.
- * @lucene.experimental
- */
-public class PostingsHighlighter {
-  
-  // TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
-  // but if the analyzer is really fast and the field is tiny, this might really be
-  // unnecessary.
-  
-  /** for rewriting: we don't want slow processing from MTQs */
-  private static final IndexSearcher EMPTY_INDEXSEARCHER;
-  static {
-    try {
-      IndexReader emptyReader = new MultiReader();
-      EMPTY_INDEXSEARCHER = new IndexSearcher(emptyReader);
-      EMPTY_INDEXSEARCHER.setQueryCache(null);
-    } catch (IOException bogus) {
-      throw new RuntimeException(bogus);
-    }
-  }
-  
-  /** Default maximum content size to process. Typically snippets
-   *  closer to the beginning of the document better summarize its content */
-  public static final int DEFAULT_MAX_LENGTH = 10000;
-    
-  private final int maxLength;
-
-  /** Set the first time {@link #getFormatter} is called,
-   *  and then reused. */
-  private PassageFormatter defaultFormatter;
-
-  /** Set the first time {@link #getScorer} is called,
-   *  and then reused. */
-  private PassageScorer defaultScorer;
-  
-  /**
-   * Creates a new highlighter with {@link #DEFAULT_MAX_LENGTH}.
-   */
-  public PostingsHighlighter() {
-    this(DEFAULT_MAX_LENGTH);
-  }
-  
-  /**
-   * Creates a new highlighter, specifying maximum content length.
-   * @param maxLength maximum content size to process.
-   * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
-   */
-  public PostingsHighlighter(int maxLength) {
-    if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
-      // two reasons: no overflow problems in BreakIterator.preceding(offset+1),
-      // our sentinel in the offsets queue uses this value to terminate.
-      throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
-    }
-    this.maxLength = maxLength;
-  }
-  
-  /** Returns the {@link BreakIterator} to use for
-   *  dividing text into passages.  This returns 
-   *  {@link BreakIterator#getSentenceInstance(Locale)} by default;
-   *  subclasses can override to customize. */
-  protected BreakIterator getBreakIterator(String field) {
-    return BreakIterator.getSentenceInstance(Locale.ROOT);
-  }
-
-  /** Returns the {@link PassageFormatter} to use for
-   *  formatting passages into highlighted snippets.  This
-   *  returns a new {@code PassageFormatter} by default;
-   *  subclasses can override to customize. */
-  protected PassageFormatter getFormatter(String field) {
-    if (defaultFormatter == null) {
-      defaultFormatter = new DefaultPassageFormatter();
-    }
-    return defaultFormatter;
-  }
-
-  /** Returns the {@link PassageScorer} to use for
-   *  ranking passages.  This
-   *  returns a new {@code PassageScorer} by default;
-   *  subclasses can override to customize. */
-  protected PassageScorer getScorer(String field) {
-    if (defaultScorer == null) {
-      defaultScorer = new PassageScorer();
-    }
-    return defaultScorer;
-  }
-
-  /**
-   * Highlights the top passages from a single field.
-   * 
-   * @param field field name to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param topDocs TopDocs containing the summary result documents to highlight.
-   * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>. 
-   *         If no highlights were found for a document, the
-   *         first sentence for the field will be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
-    return highlight(field, query, searcher, topDocs, 1);
-  }
-  
-  /**
-   * Highlights the top-N passages from a single field.
-   * 
-   * @param field field name to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param topDocs TopDocs containing the summary result documents to highlight.
-   * @param maxPassages The maximum number of top-N ranked passages used to 
-   *        form the highlighted snippets.
-   * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>. 
-   *         If no highlights were found for a document, the
-   *         first {@code maxPassages} sentences from the
-   *         field will be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
-    Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, new int[] { maxPassages });
-    return res.get(field);
-  }
-  
-  /**
-   * Highlights the top passages from multiple fields.
-   * <p>
-   * Conceptually, this behaves as a more efficient form of:
-   * <pre class="prettyprint">
-   * Map m = new HashMap();
-   * for (String field : fields) {
-   *   m.put(field, highlight(field, query, searcher, topDocs));
-   * }
-   * return m;
-   * </pre>
-   * 
-   * @param fields field names to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param topDocs TopDocs containing the summary result documents to highlight.
-   * @return Map keyed on field name, containing the array of formatted snippets 
-   *         corresponding to the documents in <code>topDocs</code>. 
-   *         If no highlights were found for a document, the
-   *         first sentence from the field will be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
-    int maxPassages[] = new int[fields.length];
-    Arrays.fill(maxPassages, 1);
-    return highlightFields(fields, query, searcher, topDocs, maxPassages);
-  }
-  
-  /**
-   * Highlights the top-N passages from multiple fields.
-   * <p>
-   * Conceptually, this behaves as a more efficient form of:
-   * <pre class="prettyprint">
-   * Map m = new HashMap();
-   * for (String field : fields) {
-   *   m.put(field, highlight(field, query, searcher, topDocs, maxPassages));
-   * }
-   * return m;
-   * </pre>
-   * 
-   * @param fields field names to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param topDocs TopDocs containing the summary result documents to highlight.
-   * @param maxPassages The maximum number of top-N ranked passages per-field used to 
-   *        form the highlighted snippets.
-   * @return Map keyed on field name, containing the array of formatted snippets 
-   *         corresponding to the documents in <code>topDocs</code>. 
-   *         If no highlights were found for a document, the
-   *         first {@code maxPassages} sentences from the
-   *         field will be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages[]) throws IOException {
-    final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
-    int docids[] = new int[scoreDocs.length];
-    for (int i = 0; i < docids.length; i++) {
-      docids[i] = scoreDocs[i].doc;
-    }
-
-    return highlightFields(fields, query, searcher, docids, maxPassages);
-  }
-
-  /**
-   * Highlights the top-N passages from multiple fields,
-   * for the provided int[] docids.
-   * 
-   * @param fieldsIn field names to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param docidsIn containing the document IDs to highlight.
-   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to 
-   *        form the highlighted snippets.
-   * @return Map keyed on field name, containing the array of formatted snippets 
-   *         corresponding to the documents in <code>docidsIn</code>. 
-   *         If no highlights were found for a document, the
-   *         first {@code maxPassages} from the field will
-   *         be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
-    Map<String,String[]> snippets = new HashMap<>();
-    for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
-      Object[] snippetObjects = ent.getValue();
-      String[] snippetStrings = new String[snippetObjects.length];
-      snippets.put(ent.getKey(), snippetStrings);
-      for(int i=0;i<snippetObjects.length;i++) {
-        Object snippet = snippetObjects[i];
-        if (snippet != null) {
-          snippetStrings[i] = snippet.toString();
-        }
-      }
-    }
-
-    return snippets;
-  }
-
-  /**
-   * Expert: highlights the top-N passages from multiple fields,
-   * for the provided int[] docids, to custom Object as
-   * returned by the {@link PassageFormatter}.  Use
-   * this API to render to something other than String.
-   * 
-   * @param fieldsIn field names to highlight. 
-   *        Must have a stored string value and also be indexed with offsets.
-   * @param query query to highlight.
-   * @param searcher searcher that was previously used to execute the query.
-   * @param docidsIn containing the document IDs to highlight.
-   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to 
-   *        form the highlighted snippets.
-   * @return Map keyed on field name, containing the array of formatted snippets 
-   *         corresponding to the documents in <code>docidsIn</code>. 
-   *         If no highlights were found for a document, the
-   *         first {@code maxPassages} from the field will
-   *         be returned.
-   * @throws IOException if an I/O error occurred during processing
-   * @throws IllegalArgumentException if <code>field</code> was indexed without 
-   *         {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
-   */
-  protected Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
-    if (fieldsIn.length < 1) {
-      throw new IllegalArgumentException("fieldsIn must not be empty");
-    }
-    if (fieldsIn.length != maxPassagesIn.length) {
-      throw new IllegalArgumentException("invalid number of maxPassagesIn");
-    }
-    SortedSet<Term> queryTerms = new TreeSet<>();
-    EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
-
-    IndexReaderContext readerContext = searcher.getIndexReader().getContext();
-    List<LeafReaderContext> leaves = readerContext.leaves();
-
-    // Make our own copies because we sort in-place:
-    int[] docids = new int[docidsIn.length];
-    System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
-    final String fields[] = new String[fieldsIn.length];
-    System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
-    final int maxPassages[] = new int[maxPassagesIn.length];
-    System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
-
-    // sort for sequential io
-    Arrays.sort(docids);
-    new InPlaceMergeSorter() {
-
-      @Override
-      protected void swap(int i, int j) {
-        String tmp = fields[i];
-        fields[i] = fields[j];
-        fields[j] = tmp;
-        int tmp2 = maxPassages[i];
-        maxPassages[i] = maxPassages[j];
-        maxPassages[j] = tmp2;
-      }
-
-      @Override
-      protected int compare(int i, int j) {
-        return fields[i].compareTo(fields[j]);
-      }
-      
-    }.sort(0, fields.length);
-    
-    // pull stored data:
-    String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
-    
-    Map<String,Object[]> highlights = new HashMap<>();
-    for (int i = 0; i < fields.length; i++) {
-      String field = fields[i];
-      int numPassages = maxPassages[i];
-      Term floor = new Term(field, "");
-      Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
-      SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
-      // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
-
-      // Strip off the redundant field:
-      BytesRef terms[] = new BytesRef[fieldTerms.size()];
-      int termUpto = 0;
-      for(Term term : fieldTerms) {
-        terms[termUpto++] = term.bytes();
-      }
-      Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages, query);
-        
-      Object[] result = new Object[docids.length];
-      for (int j = 0; j < docidsIn.length; j++) {
-        result[j] = fieldHighlights.get(docidsIn[j]);
-      }
-      highlights.put(field, result);
-    }
-    return highlights;
-  }
-
-  /** Loads the String values for each field X docID to be
-   *  highlighted.  By default this loads from stored
-   *  fields, but a subclass can change the source.  This
-   *  method should allocate the String[fields.length][docids.length]
-   *  and fill all values.  The returned Strings must be
-   *  identical to what was indexed. */
-  protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
-    String contents[][] = new String[fields.length][docids.length];
-    char valueSeparators[] = new char[fields.length];
-    for (int i = 0; i < fields.length; i++) {
-      valueSeparators[i] = getMultiValuedSeparator(fields[i]);
-    }
-    LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength);
-    for (int i = 0; i < docids.length; i++) {
-      searcher.doc(docids[i], visitor);
-      for (int j = 0; j < fields.length; j++) {
-        contents[j][i] = visitor.getValue(j).toString();
-      }
-      visitor.reset();
-    }
-    return contents;
-  }
-  
-  /** 
-   * Returns the logical separator between values for multi-valued fields.
-   * The default value is a space character, which means passages can span across values,
-   * but a subclass can override, for example with {@code U+2029 PARAGRAPH SEPARATOR (PS)}
-   * if each value holds a discrete passage for highlighting.
-   */
-  protected char getMultiValuedSeparator(String field) {
-    return ' ';
-  }
-  
-  /** 
-   * Returns the analyzer originally used to index the content for {@code field}.
-   * <p>
-   * This is used to highlight some MultiTermQueries.
-   * @return Analyzer or null (the default, meaning no special multi-term processing)
-   */
-  protected Analyzer getIndexAnalyzer(String field) {
-    return null;
-  }
-    
-  private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<LeafReaderContext> leaves, int maxPassages, Query query) throws IOException {  
-    Map<Integer,Object> highlights = new HashMap<>();
-
-    PassageFormatter fieldFormatter = getFormatter(field);
-    if (fieldFormatter == null) {
-      throw new NullPointerException("PassageFormatter must not be null");
-    }
-    
-    // check if we should do any multiterm processing
-    Analyzer analyzer = getIndexAnalyzer(field);
-    CharacterRunAutomaton automata[] = new CharacterRunAutomaton[0];
-    if (analyzer != null) {
-      automata = MultiTermHighlighting.extractAutomata(query, field);
-    }
-    
-    // resize 'terms', where the last term is the multiterm matcher
-    if (automata.length > 0) {
-      BytesRef newTerms[] = new BytesRef[terms.length + 1];
-      System.arraycopy(terms, 0, newTerms, 0, terms.length);
-      terms = newTerms;
-    }
-
-    // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
-    // otherwise, we will just advance() existing enums to the new document in the same segment.
-    PostingsEnum postings[] = null;
-    TermsEnum termsEnum = null;
-    int lastLeaf = -1;
-    
-    for (int i = 0; i < docids.length; i++) {
-      String content = contents[i];
-      if (content.length() == 0) {
-        continue; // nothing to do
-      }
-      bi.setText(content);
-      int doc = docids[i];
-      int leaf = ReaderUtil.subIndex(doc, leaves);
-      LeafReaderContext subContext = leaves.get(leaf);
-      LeafReader r = subContext.reader();
-      
-      assert leaf >= lastLeaf; // increasing order
-      
-      // if the segment has changed, we must initialize new enums.
-      if (leaf != lastLeaf) {
-        Terms t = r.terms(field);
-        if (t != null) {
-          if (!t.hasOffsets()) {
-            // no offsets available
-            throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
-          }
-          termsEnum = t.iterator();
-          postings = new PostingsEnum[terms.length];
-        } else {
-          termsEnum = null;
-        }
-      }
-      if (termsEnum == null) {
-        continue; // no terms for this field, nothing to do
-      }
-      
-      // if there are multi-term matches, we have to initialize the "fake" enum for each document
-      if (automata.length > 0) {
-        PostingsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata);
-        dp.advance(doc - subContext.docBase);
-        postings[terms.length-1] = dp; // last term is the multiterm matcher
-      }
-      
-      Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
-      
-      if (passages.length == 0) {
-        // no passages were returned, so ask for a default summary
-        passages = getEmptyHighlight(field, bi, maxPassages);
-      }
-
-      if (passages.length > 0) {
-        highlights.put(doc, fieldFormatter.format(passages, content));
-      }
-      
-      lastLeaf = leaf;
-    }
-    
-    return highlights;
-  }
-  
-  // algorithm: treat sentence snippets as miniature documents
-  // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
-  // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
-  private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc, 
-      TermsEnum termsEnum, PostingsEnum[] postings, int n) throws IOException {
-    PassageScorer scorer = getScorer(field);
-    if (scorer == null) {
-      throw new NullPointerException("PassageScorer must not be null");
-    }
-    PriorityQueue<OffsetsEnum> pq = new PriorityQueue<>();
-    float weights[] = new float[terms.length];
-    // initialize postings
-    for (int i = 0; i < terms.length; i++) {
-      PostingsEnum de = postings[i];
-      int pDoc;
-      if (de == EMPTY) {
-        continue;
-      } else if (de == null) {
-        postings[i] = EMPTY; // initially
-        if (!termsEnum.seekExact(terms[i])) {
-          continue; // term not found
-        }
-        de = postings[i] = termsEnum.postings(null, PostingsEnum.OFFSETS);
-        assert de != null;
-        pDoc = de.advance(doc);
-      } else {
-        pDoc = de.docID();
-        if (pDoc < doc) {
-          pDoc = de.advance(doc);
-        }
-      }
-
-      if (doc == pDoc) {
-        weights[i] = scorer.weight(contentLength, de.freq());
-        de.nextPosition();
-        pq.add(new OffsetsEnum(de, i));
-      }
-    }
-    
-    pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
-    
-    PriorityQueue<Passage> passageQueue = new PriorityQueue<>(n, new Comparator<Passage>() {
-      @Override
-      public int compare(Passage left, Passage right) {
-        if (left.score < right.score) {
-          return -1;
-        } else if (left.score > right.score) {
-          return 1;
-        } else {
-          return left.startOffset - right.startOffset;
-        }
-      }
-    });
-    Passage current = new Passage();
-    
-    OffsetsEnum off;
-    while ((off = pq.poll()) != null) {
-      final PostingsEnum dp = off.dp;
-      int start = dp.startOffset();
-      assert start >= 0;
-      int end = dp.endOffset();
-      // LUCENE-5166: this hit would span the content limit... however more valid 
-      // hits may exist (they are sorted by start). so we pretend like we never 
-      // saw this term, it won't cause a passage to be added to passageQueue or anything.
-      assert EMPTY.startOffset() == Integer.MAX_VALUE;
-      if (start < contentLength && end > contentLength) {
-        continue;
-      }
-      if (start >= current.endOffset) {
-        if (current.startOffset >= 0) {
-          // finalize current
-          current.score *= scorer.norm(current.startOffset);
-          // new sentence: first add 'current' to queue 
-          if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
-            current.reset(); // can't compete, just reset it
-          } else {
-            passageQueue.offer(current);
-            if (passageQueue.size() > n) {
-              current = passageQueue.poll();
-              current.reset();
-            } else {
-              current = new Passage();
-            }
-          }
-        }
-        // if we exceed limit, we are done
-        if (start >= contentLength) {
-          Passage passages[] = new Passage[passageQueue.size()];
-          passageQueue.toArray(passages);
-          for (Passage p : passages) {
-            p.sort();
-          }
-          // sort in ascending order
-          Arrays.sort(passages, new Comparator<Passage>() {
-            @Override
-            public int compare(Passage left, Passage right) {
-              return left.startOffset - right.startOffset;
-            }
-          });
-          return passages;
-        }
-        // advance breakiterator
-        assert BreakIterator.DONE < 0;
-        current.startOffset = Math.max(bi.preceding(start+1), 0);
-        current.endOffset = Math.min(bi.next(), contentLength);
-      }
-      int tf = 0;
-      while (true) {
-        tf++;
-        BytesRef term = terms[off.id];
-        if (term == null) {
-          // multitermquery match, pull from payload
-          term = off.dp.getPayload();
-          assert term != null;
-        }
-        current.addMatch(start, end, term);
-        if (off.pos == dp.freq()) {
-          break; // removed from pq
-        } else {
-          off.pos++;
-          dp.nextPosition();
-          start = dp.startOffset();
-          end = dp.endOffset();
-        }
-        if (start >= current.endOffset || end > contentLength) {
-          pq.offer(off);
-          break;
-        }
-      }
-      current.score += weights[off.id] * scorer.tf(tf, current.endOffset - current.startOffset);
-    }
-
-    // Dead code but compiler disagrees:
-    assert false;
-    return null;
-  }
-
-  /** Called to summarize a document when no hits were
-   *  found.  By default this just returns the first
-   *  {@code maxPassages} sentences; subclasses can override
-   *  to customize. */
-  protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
-    // BreakIterator should be un-next'd:
-    List<Passage> passages = new ArrayList<>();
-    int pos = bi.current();
-    assert pos == 0;
-    while (passages.size() < maxPassages) {
-      int next = bi.next();
-      if (next == BreakIterator.DONE) {
-        break;
-      }
-      Passage passage = new Passage();
-      passage.score = Float.NaN;
-      passage.startOffset = pos;
-      passage.endOffset = next;
-      passages.add(passage);
-      pos = next;
-    }
-
-    return passages.toArray(new Passage[passages.size()]);
-  }
-  
-  private static class OffsetsEnum implements Comparable<OffsetsEnum> {
-    PostingsEnum dp;
-    int pos;
-    int id;
-    
-    OffsetsEnum(PostingsEnum dp, int id) throws IOException {
-      this.dp = dp;
-      this.id = id;
-      this.pos = 1;
-    }
-
-    @Override
-    public int compareTo(OffsetsEnum other) {
-      try {
-        int off = dp.startOffset();
-        int otherOff = other.dp.startOffset();
-        if (off == otherOff) {
-          return id - other.id;
-        } else {
-          return Integer.compare(off, otherOff);
-        }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-  
-  private static final PostingsEnum EMPTY = new PostingsEnum() {
-
-    @Override
-    public int nextPosition() throws IOException { return -1; }
-
-    @Override
-    public int startOffset() throws IOException { return Integer.MAX_VALUE; }
-
-    @Override
-    public int endOffset() throws IOException { return Integer.MAX_VALUE; }
-
-    @Override
-    public BytesRef getPayload() throws IOException { return null; }
-
-    @Override
-    public int freq() throws IOException { return 0; }
-
-    @Override
-    public int docID() { return NO_MORE_DOCS; }
-
-    @Override
-    public int nextDoc() throws IOException { return NO_MORE_DOCS; }
-
-    @Override
-    public int advance(int target) throws IOException { return NO_MORE_DOCS; }
-    
-    @Override
-    public long cost() { return 0; }
-  };
-  
-  private static class LimitedStoredFieldVisitor extends StoredFieldVisitor {
-    private final String fields[];
-    private final char valueSeparators[];
-    private final int maxLength;
-    private final StringBuilder builders[];
-    private int currentField = -1;
-    
-    public LimitedStoredFieldVisitor(String fields[], char valueSeparators[], int maxLength) {
-      assert fields.length == valueSeparators.length;
-      this.fields = fields;
-      this.valueSeparators = valueSeparators;
-      this.maxLength = maxLength;
-      builders = new StringBuilder[fields.length];
-      for (int i = 0; i < builders.length; i++) {
-        builders[i] = new StringBuilder();
-      }
-    }
-    
-    @Override
-    public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
-      String value = new String(bytes, StandardCharsets.UTF_8);
-      assert currentField >= 0;
-      StringBuilder builder = builders[currentField];
-      if (builder.length() > 0 && builder.length() < maxLength) {
-        builder.append(valueSeparators[currentField]);
-      }
-      if (builder.length() + value.length() > maxLength) {
-        builder.append(value, 0, maxLength - builder.length());
-      } else {
-        builder.append(value);
-      }
-    }
-
-    @Override
-    public Status needsField(FieldInfo fieldInfo) throws IOException {
-      currentField = Arrays.binarySearch(fields, fieldInfo.name);
-      if (currentField < 0) {
-        return Status.NO;
-      } else if (builders[currentField].length() > maxLength) {
-        return fields.length == 1 ? Status.STOP : Status.NO;
-      }
-      return Status.YES;
-    }
-    
-    String getValue(int i) {
-      return builders[i].toString();
-    }
-    
-    void reset() {
-      currentField = -1;
-      for (int i = 0; i < fields.length; i++) {
-        builders[i].setLength(0);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package-info.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package-info.java
deleted file mode 100644
index 10013c2..0000000
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Highlighter implementation that uses offsets from postings lists.
- */
-package org.apache.lucene.search.postingshighlight;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CustomSeparatorBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CustomSeparatorBreakIterator.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CustomSeparatorBreakIterator.java
new file mode 100644
index 0000000..7119aed
--- /dev/null
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/CustomSeparatorBreakIterator.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * A {@link BreakIterator} that breaks the text whenever a certain separator, provided as a constructor argument, is found.
+ */
+public final class CustomSeparatorBreakIterator extends BreakIterator {
+
+  private final char separator;
+  private CharacterIterator text;
+  private int current;
+
+  public CustomSeparatorBreakIterator(char separator) {
+    this.separator = separator;
+  }
+
+  @Override
+  public int current() {
+    return current;
+  }
+
+  @Override
+  public int first() {
+    text.setIndex(text.getBeginIndex());
+    return current = text.getIndex();
+  }
+
+  @Override
+  public int last() {
+    text.setIndex(text.getEndIndex());
+    return current = text.getIndex();
+  }
+
+  @Override
+  public int next() {
+    if (text.getIndex() == text.getEndIndex()) {
+      return DONE;
+    } else {
+      return advanceForward();
+    }
+  }
+
+  private int advanceForward() {
+    char c;
+    while ((c = text.next()) != CharacterIterator.DONE) {
+      if (c == separator) {
+        return current = text.getIndex() + 1;
+      }
+    }
+    assert text.getIndex() == text.getEndIndex();
+    return current = text.getIndex();
+  }
+
+  @Override
+  public int following(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (pos == text.getEndIndex()) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getEndIndex());
+      current = text.getIndex();
+      return DONE;
+    } else {
+      text.setIndex(pos);
+      current = text.getIndex();
+      return advanceForward();
+    }
+  }
+
+  @Override
+  public int previous() {
+    if (text.getIndex() == text.getBeginIndex()) {
+      return DONE;
+    } else {
+      return advanceBackward();
+    }
+  }
+
+  private int advanceBackward() {
+    char c;
+    while ((c = text.previous()) != CharacterIterator.DONE) {
+      if (c == separator) {
+        return current = text.getIndex() + 1;
+      }
+    }
+    assert text.getIndex() == text.getBeginIndex();
+    return current = text.getIndex();
+  }
+
+  @Override
+  public int preceding(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (pos == text.getBeginIndex()) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getBeginIndex());
+      current = text.getIndex();
+      return DONE;
+    } else {
+      text.setIndex(pos);
+      current = text.getIndex();
+      return advanceBackward();
+    }
+  }
+
+  @Override
+  public int next(int n) {
+    if (n < 0) {
+      for (int i = 0; i < -n; i++) {
+        previous();
+      }
+    } else {
+      for (int i = 0; i < n; i++) {
+        next();
+      }
+    }
+    return current();
+  }
+
+  @Override
+  public CharacterIterator getText() {
+    return text;
+  }
+
+  @Override
+  public void setText(CharacterIterator newText) {
+    text = newText;
+    current = text.getBeginIndex();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/WholeBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/WholeBreakIterator.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/WholeBreakIterator.java
new file mode 100644
index 0000000..37f48aa
--- /dev/null
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/WholeBreakIterator.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/** Just produces one single fragment for the entire text */
+public final class WholeBreakIterator extends BreakIterator {
+  private CharacterIterator text;
+  private int start;
+  private int end;
+  private int current;
+
+  @Override
+  public int current() {
+    return current;
+  }
+
+  @Override
+  public int first() {
+    return (current = start);
+  }
+
+  @Override
+  public int following(int pos) {
+    if (pos < start || pos > end) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (pos == end) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      current = end;
+      return DONE;
+    } else {
+      return last();
+    }
+  }
+
+  @Override
+  public CharacterIterator getText() {
+    return text;
+  }
+
+  @Override
+  public int last() {
+    return (current = end);
+  }
+
+  @Override
+  public int next() {
+    if (current == end) {
+      return DONE;
+    } else {
+      return last();
+    }
+  }
+
+  @Override
+  public int next(int n) {
+    if (n < 0) {
+      for (int i = 0; i < -n; i++) {
+        previous();
+      }
+    } else {
+      for (int i = 0; i < n; i++) {
+        next();
+      }
+    }
+    return current();
+  }
+
+  @Override
+  public int preceding(int pos) {
+    if (pos < start || pos > end) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (pos == start) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      current = start;
+      return DONE;
+    } else {
+      return first();
+    }
+  }
+
+  @Override
+  public int previous() {
+    if (current == start) {
+      return DONE;
+    } else {
+      return first();
+    }
+  }
+
+  @Override
+  public void setText(CharacterIterator newText) {
+    start = newText.getBeginIndex();
+    end = newText.getEndIndex();
+    text = newText;
+    current = start;
+  }
+}