You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by ds...@apache.org on 2016/11/17 16:25:44 UTC

[1/2] lucene-solr:branch_6x: LUCENE-7559: UnifiedHighlighter: Increase Passage visibility

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x e525cca01 -> 06ee34c68


LUCENE-7559: UnifiedHighlighter: Increase Passage visibility

(cherry picked from commit c51e890)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/23b8bb66
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/23b8bb66
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/23b8bb66

Branch: refs/heads/branch_6x
Commit: 23b8bb669359008d19fd19d744ba42ef8870d25c
Parents: e525cca
Author: David Smiley <ds...@apache.org>
Authored: Thu Nov 17 11:19:54 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Nov 17 11:25:12 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../uhighlight/AnalysisOffsetStrategy.java      |  3 ++
 .../uhighlight/DefaultPassageFormatter.java     | 14 ++++----
 .../search/uhighlight/FieldHighlighter.java     | 30 ++++++++--------
 .../lucene/search/uhighlight/OffsetsEnum.java   |  8 ++---
 .../lucene/search/uhighlight/Passage.java       | 38 ++++++++++++++------
 .../uhighlight/TokenStreamOffsetStrategy.java   |  7 ++--
 .../uhighlight/TestUnifiedHighlighterMTQ.java   | 14 ++++----
 8 files changed, 70 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c252da8..6039562 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Other
 
 * LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
 
+* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
+  be customized. (David Smiley)
+
 Build
 
 * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index e9db77c..162d270 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
    *
    * @lucene.internal
    */
+  // TODO we could make this go away.  MemoryIndexOffsetStrategy could simply split and analyze each value into the
+  //   MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
+  //   albeit with less code, less hack.
   private static final class MultiValueTokenStream extends TokenFilter {
 
     private final String fieldName;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
index 77612a7..bc27a43 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@@ -63,13 +63,13 @@ public class DefaultPassageFormatter extends PassageFormatter {
         int pos = 0;
         for (Passage passage : passages) {
             // don't add ellipsis if its the first one, or if its connected.
-            if (passage.startOffset > pos && pos > 0) {
+            if (passage.getStartOffset() > pos && pos > 0) {
                 sb.append(ellipsis);
             }
-            pos = passage.startOffset;
-            for (int i = 0; i < passage.numMatches; i++) {
-                int start = passage.matchStarts[i];
-                int end = passage.matchEnds[i];
+            pos = passage.getStartOffset();
+            for (int i = 0; i < passage.getNumMatches(); i++) {
+                int start = passage.getMatchStarts()[i];
+                int end = passage.getMatchEnds()[i];
                 // its possible to have overlapping terms
                 if (start > pos) {
                     append(sb, content, pos, start);
@@ -82,8 +82,8 @@ public class DefaultPassageFormatter extends PassageFormatter {
                 }
             }
             // its possible a "term" from the analyzer could span a sentence boundary.
-            append(sb, content, pos, Math.max(pos, passage.endOffset));
-            pos = passage.endOffset;
+            append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
+            pos = passage.getEndOffset();
         }
         return sb.toString();
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
index 7c8f048..1caa739 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@@ -117,9 +117,9 @@ public class FieldHighlighter {
         break;
       }
       Passage passage = new Passage();
-      passage.score = Float.NaN;
-      passage.startOffset = pos;
-      passage.endOffset = next;
+      passage.setScore(Float.NaN);
+      passage.setStartOffset(pos);
+      passage.setEndOffset(next);
       passages.add(passage);
       pos = next;
     }
@@ -145,12 +145,12 @@ public class FieldHighlighter {
     offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
 
     PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
-      if (left.score < right.score) {
+      if (left.getScore() < right.getScore()) {
         return -1;
-      } else if (left.score > right.score) {
+      } else if (left.getScore() > right.getScore()) {
         return 1;
       } else {
-        return left.startOffset - right.startOffset;
+        return left.getStartOffset() - right.getStartOffset();
       }
     });
     Passage passage = new Passage(); // the current passage in-progress.  Will either get reset or added to queue.
@@ -170,12 +170,12 @@ public class FieldHighlighter {
         continue;
       }
       // See if this term should be part of a new passage.
-      if (start >= passage.endOffset) {
-        if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
+      if (start >= passage.getEndOffset()) {
+        if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
           // finalize passage
-          passage.score *= scorer.norm(passage.startOffset);
+          passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
           // new sentence: first add 'passage' to queue
-          if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
+          if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
             passage.reset(); // can't compete, just reset it
           } else {
             passageQueue.offer(passage);
@@ -192,8 +192,8 @@ public class FieldHighlighter {
           break;
         }
         // advance breakIterator
-        passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
-        passage.endOffset = Math.min(breakIterator.following(start), contentLength);
+        passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
+        passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
       }
       // Add this term to the passage.
       int tf = 0;
@@ -209,12 +209,12 @@ public class FieldHighlighter {
         off.nextPosition();
         start = off.startOffset();
         end = off.endOffset();
-        if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
+        if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
           offsetsEnumQueue.offer(off);
           break;
         }
       }
-      passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
+      passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
     }
 
     Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
@@ -222,7 +222,7 @@ public class FieldHighlighter {
       p.sort();
     }
     // sort in ascending order
-    Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
+    Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
     return passages;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index cbaeb90..db1ea1f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
   }
 
   BytesRef getTerm() throws IOException {
-    // the dp.getPayload thing is a hack -- see MultiTermHighlighting
-    return term != null ? term : postingsEnum.getPayload();
-    // We don't deepcopy() because in this hack we know we don't have to.
+    // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
+    return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
   }
 
   boolean hasMorePositions() throws IOException {
@@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
 
   @Override
   public void close() throws IOException {
-    if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
+    // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
+    if (postingsEnum instanceof Closeable) {
       ((Closeable) postingsEnum).close();
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
index de37d5d..a131d86 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@@ -30,16 +30,17 @@ import org.apache.lucene.util.RamUsageEstimator;
  *
  * @lucene.experimental
  */
-public final class Passage {
-    int startOffset = -1;
-    int endOffset = -1;
-    float score = 0.0f;
+public class Passage {
+    private int startOffset = -1;
+    private int endOffset = -1;
+    private float score = 0.0f;
 
-    int matchStarts[] = new int[8];
-    int matchEnds[] = new int[8];
-    BytesRef matchTerms[] = new BytesRef[8];
-    int numMatches = 0;
+    private int[] matchStarts = new int[8];
+    private int[] matchEnds = new int[8];
+    private BytesRef[] matchTerms = new BytesRef[8];
+    private int numMatches = 0;
 
+    /** @lucene.internal */
     public void addMatch(int startOffset, int endOffset, BytesRef term) {
         assert startOffset >= this.startOffset && startOffset <= this.endOffset;
         if (numMatches == matchStarts.length) {
@@ -61,7 +62,8 @@ public final class Passage {
         numMatches++;
     }
 
-    void sort() {
+    /** @lucene.internal */
+    public void sort() {
         final int starts[] = matchStarts;
         final int ends[] = matchEnds;
         final BytesRef terms[] = matchTerms;
@@ -89,7 +91,8 @@ public final class Passage {
         }.sort(0, numMatches);
     }
 
-    void reset() {
+    /** @lucene.internal */
+    public void reset() {
         startOffset = endOffset = -1;
         score = 0.0f;
         numMatches = 0;
@@ -158,4 +161,19 @@ public final class Passage {
     public BytesRef[] getMatchTerms() {
         return matchTerms;
     }
+
+    /** @lucene.internal */
+    public void setStartOffset(int startOffset) {
+        this.startOffset = startOffset;
+    }
+
+    /** @lucene.internal */
+    public void setEndOffset(int endOffset) {
+        this.endOffset = endOffset;
+    }
+
+    /** @lucene.internal */
+    public void setScore(float score) {
+        this.score = score;
+    }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index 966eeef..28eb6b1 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
     return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
   }
 
-  // but this would have a performance cost for likely little gain in the user experience, it
-  // would only serve to make this method less bogus.
-  // instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
-  // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
+  // See class javadocs.
+  // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?  See TODOs in OffsetsEnum.
   private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
     TokenStream stream; // becomes null when closed
     final CharacterRunAutomaton[] matchers;
@@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
       return currentEndOffset;
     }
 
+    // TOTAL HACK; used in OffsetsEnum.getTerm()
     @Override
     public BytesRef getPayload() throws IOException {
       if (matchDescriptions[currentMatch] == null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 23eefdf..5435b11 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
             int pos = 0;
             for (Passage passage : passages) {
               // don't add ellipsis if its the first one, or if its connected.
-              if (passage.startOffset > pos && pos > 0) {
+              if (passage.getStartOffset() > pos && pos > 0) {
                 sb.append("... ");
               }
-              pos = passage.startOffset;
-              for (int i = 0; i < passage.numMatches; i++) {
-                int start = passage.matchStarts[i];
-                int end = passage.matchEnds[i];
+              pos = passage.getStartOffset();
+              for (int i = 0; i < passage.getNumMatches(); i++) {
+                int start = passage.getMatchStarts()[i];
+                int end = passage.getMatchEnds()[i];
                 // its possible to have overlapping terms
                 if (start > pos) {
                   sb.append(content, pos, start);
@@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
                 }
               }
               // its possible a "term" from the analyzer could span a sentence boundary.
-              sb.append(content, pos, Math.max(pos, passage.endOffset));
-              pos = passage.endOffset;
+              sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
+              pos = passage.getEndOffset();
             }
             return sb.toString();
           }

[2/2] lucene-solr:branch_6x: LUCENE-7559: fix indentation of entire file

Posted by ds...@apache.org.

LUCENE-7559: fix indentation of entire file

(cherry picked from commit cbc8392)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/06ee34c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/06ee34c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/06ee34c6

Branch: refs/heads/branch_6x
Commit: 06ee34c68225fd904b8909af44173574890d881e
Parents: 23b8bb6
Author: David Smiley <ds...@apache.org>
Authored: Thu Nov 17 11:23:40 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Nov 17 11:25:36 2016 -0500

----------------------------------------------------------------------
 .../uhighlight/DefaultPassageFormatter.java     | 204 ++++++-------
 .../lucene/search/uhighlight/Passage.java       | 292 ++++++++++---------
 2 files changed, 250 insertions(+), 246 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/06ee34c6/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
index bc27a43..62d58df 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
  * ellipses between unconnected passages.
  */
 public class DefaultPassageFormatter extends PassageFormatter {
-    /** text that will appear before highlighted terms */
-    protected final String preTag;
-    /** text that will appear after highlighted terms */
-    protected final String postTag;
-    /** text that will appear between two unconnected passages */
-    protected final String ellipsis;
-    /** true if we should escape for html */
-    protected final boolean escape;
+  /** text that will appear before highlighted terms */
+  protected final String preTag;
+  /** text that will appear after highlighted terms */
+  protected final String postTag;
+  /** text that will appear between two unconnected passages */
+  protected final String ellipsis;
+  /** true if we should escape for html */
+  protected final boolean escape;
 
-    /**
-     * Creates a new DefaultPassageFormatter with the default tags.
-     */
-    public DefaultPassageFormatter() {
-        this("<b>", "</b>", "... ", false);
-    }
+  /**
+   * Creates a new DefaultPassageFormatter with the default tags.
+   */
+  public DefaultPassageFormatter() {
+    this("<b>", "</b>", "... ", false);
+  }
 
-    /**
-     * Creates a new DefaultPassageFormatter with custom tags.
-     * @param preTag text which should appear before a highlighted term.
-     * @param postTag text which should appear after a highlighted term.
-     * @param ellipsis text which should be used to connect two unconnected passages.
-     * @param escape true if text should be html-escaped
-     */
-    public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
-        if (preTag == null || postTag == null || ellipsis == null) {
-            throw new NullPointerException();
-        }
-        this.preTag = preTag;
-        this.postTag = postTag;
-        this.ellipsis = ellipsis;
-        this.escape = escape;
+  /**
+   * Creates a new DefaultPassageFormatter with custom tags.
+   *
+   * @param preTag   text which should appear before a highlighted term.
+   * @param postTag  text which should appear after a highlighted term.
+   * @param ellipsis text which should be used to connect two unconnected passages.
+   * @param escape   true if text should be html-escaped
+   */
+  public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
+    if (preTag == null || postTag == null || ellipsis == null) {
+      throw new NullPointerException();
     }
+    this.preTag = preTag;
+    this.postTag = postTag;
+    this.ellipsis = ellipsis;
+    this.escape = escape;
+  }
 
-    @Override
-    public String format(Passage passages[], String content) {
-        StringBuilder sb = new StringBuilder();
-        int pos = 0;
-        for (Passage passage : passages) {
-            // don't add ellipsis if its the first one, or if its connected.
-            if (passage.getStartOffset() > pos && pos > 0) {
-                sb.append(ellipsis);
-            }
-            pos = passage.getStartOffset();
-            for (int i = 0; i < passage.getNumMatches(); i++) {
-                int start = passage.getMatchStarts()[i];
-                int end = passage.getMatchEnds()[i];
-                // its possible to have overlapping terms
-                if (start > pos) {
-                    append(sb, content, pos, start);
-                }
-                if (end > pos) {
-                    sb.append(preTag);
-                    append(sb, content, Math.max(pos, start), end);
-                    sb.append(postTag);
-                    pos = end;
-                }
-            }
-            // its possible a "term" from the analyzer could span a sentence boundary.
-            append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
-            pos = passage.getEndOffset();
+  @Override
+  public String format(Passage passages[], String content) {
+    StringBuilder sb = new StringBuilder();
+    int pos = 0;
+    for (Passage passage : passages) {
+      // don't add ellipsis if its the first one, or if its connected.
+      if (passage.getStartOffset() > pos && pos > 0) {
+        sb.append(ellipsis);
+      }
+      pos = passage.getStartOffset();
+      for (int i = 0; i < passage.getNumMatches(); i++) {
+        int start = passage.getMatchStarts()[i];
+        int end = passage.getMatchEnds()[i];
+        // its possible to have overlapping terms
+        if (start > pos) {
+          append(sb, content, pos, start);
+        }
+        if (end > pos) {
+          sb.append(preTag);
+          append(sb, content, Math.max(pos, start), end);
+          sb.append(postTag);
+          pos = end;
         }
-        return sb.toString();
+      }
+      // its possible a "term" from the analyzer could span a sentence boundary.
+      append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
+      pos = passage.getEndOffset();
     }
+    return sb.toString();
+  }
 
-    /**
-     * Appends original text to the response.
-     * @param dest resulting text, possibly transformed or encoded
-     * @param content original text content
-     * @param start index of the first character in content
-     * @param end index of the character following the last character in content
-     */
-    protected void append(StringBuilder dest, String content, int start, int end) {
-        if (escape) {
-            // note: these are the rules from owasp.org
-            for (int i = start; i < end; i++) {
-                char ch = content.charAt(i);
-                switch(ch) {
-                    case '&':
-                        dest.append("&amp;");
-                        break;
-                    case '<':
-                        dest.append("&lt;");
-                        break;
-                    case '>':
-                        dest.append("&gt;");
-                        break;
-                    case '"':
-                        dest.append("&quot;");
-                        break;
-                    case '\'':
-                        dest.append("&#x27;");
-                        break;
-                    case '/':
-                        dest.append("&#x2F;");
-                        break;
-                    default:
-                        if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
-                            dest.append(ch);
-                        } else if (ch < 0xff) {
-                            dest.append("&#");
-                            dest.append((int)ch);
-                            dest.append(";");
-                        } else {
-                            dest.append(ch);
-                        }
-                }
+  /**
+   * Appends original text to the response.
+   *
+   * @param dest    resulting text, possibly transformed or encoded
+   * @param content original text content
+   * @param start   index of the first character in content
+   * @param end     index of the character following the last character in content
+   */
+  protected void append(StringBuilder dest, String content, int start, int end) {
+    if (escape) {
+      // note: these are the rules from owasp.org
+      for (int i = start; i < end; i++) {
+        char ch = content.charAt(i);
+        switch (ch) {
+          case '&':
+            dest.append("&amp;");
+            break;
+          case '<':
+            dest.append("&lt;");
+            break;
+          case '>':
+            dest.append("&gt;");
+            break;
+          case '"':
+            dest.append("&quot;");
+            break;
+          case '\'':
+            dest.append("&#x27;");
+            break;
+          case '/':
+            dest.append("&#x2F;");
+            break;
+          default:
+            if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
+              dest.append(ch);
+            } else if (ch < 0xff) {
+              dest.append("&#");
+              dest.append((int) ch);
+              dest.append(";");
+            } else {
+              dest.append(ch);
             }
-        } else {
-            dest.append(content, start, end);
         }
+      }
+    } else {
+      dest.append(content, start, end);
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/06ee34c6/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
index a131d86..d64b96e 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@@ -23,7 +23,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 
 /**
- * Represents a passage (typically a sentence of the document). 
+ * Represents a passage (typically a sentence of the document).
  * <p>
  * A passage contains {@link #getNumMatches} highlights from the query,
  * and the offsets and query terms that correspond with each match.
@@ -31,149 +31,151 @@ import org.apache.lucene.util.RamUsageEstimator;
  * @lucene.experimental
  */
 public class Passage {
-    private int startOffset = -1;
-    private int endOffset = -1;
-    private float score = 0.0f;
-
-    private int[] matchStarts = new int[8];
-    private int[] matchEnds = new int[8];
-    private BytesRef[] matchTerms = new BytesRef[8];
-    private int numMatches = 0;
-
-    /** @lucene.internal */
-    public void addMatch(int startOffset, int endOffset, BytesRef term) {
-        assert startOffset >= this.startOffset && startOffset <= this.endOffset;
-        if (numMatches == matchStarts.length) {
-            int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-            int newMatchStarts[] = new int[newLength];
-            int newMatchEnds[] = new int[newLength];
-            BytesRef newMatchTerms[] = new BytesRef[newLength];
-            System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
-            System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
-            System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
-            matchStarts = newMatchStarts;
-            matchEnds = newMatchEnds;
-            matchTerms = newMatchTerms;
-        }
-        assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
-        matchStarts[numMatches] = startOffset;
-        matchEnds[numMatches] = endOffset;
-        matchTerms[numMatches] = term;
-        numMatches++;
-    }
-
-    /** @lucene.internal */
-    public void sort() {
-        final int starts[] = matchStarts;
-        final int ends[] = matchEnds;
-        final BytesRef terms[] = matchTerms;
-        new InPlaceMergeSorter() {
-            @Override
-            protected void swap(int i, int j) {
-                int temp = starts[i];
-                starts[i] = starts[j];
-                starts[j] = temp;
-
-                temp = ends[i];
-                ends[i] = ends[j];
-                ends[j] = temp;
-
-                BytesRef tempTerm = terms[i];
-                terms[i] = terms[j];
-                terms[j] = tempTerm;
-            }
-
-            @Override
-            protected int compare(int i, int j) {
-                return Integer.compare(starts[i], starts[j]);
-            }
-
-        }.sort(0, numMatches);
-    }
-
-    /** @lucene.internal */
-    public void reset() {
-        startOffset = endOffset = -1;
-        score = 0.0f;
-        numMatches = 0;
-    }
-
-    /**
-     * Start offset of this passage.
-     * @return start index (inclusive) of the passage in the
-     *         original content: always &gt;= 0.
-     */
-    public int getStartOffset() {
-        return startOffset;
-    }
-
-    /**
-     * End offset of this passage.
-     * @return end index (exclusive) of the passage in the
-     *         original content: always &gt;= {@link #getStartOffset()}
-     */
-    public int getEndOffset() {
-        return endOffset;
-    }
-
-    /**
-     * Passage's score.
-     */
-    public float getScore() {
-        return score;
-    }
-
-    /**
-     * Number of term matches available in
-     * {@link #getMatchStarts}, {@link #getMatchEnds},
-     * {@link #getMatchTerms}
-     */
-    public int getNumMatches() {
-        return numMatches;
-    }
-
-    /**
-     * Start offsets of the term matches, in increasing order.
-     * <p>
-     * Only {@link #getNumMatches} are valid. Note that these
-     * offsets are absolute (not relative to {@link #getStartOffset()}).
-     */
-    public int[] getMatchStarts() {
-        return matchStarts;
-    }
-
-    /**
-     * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
-     * <p>
-     * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
-     * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
-     * Analyzer produced a term which spans a passage boundary.
-     */
-    public int[] getMatchEnds() {
-        return matchEnds;
-    }
-
-    /**
-     * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
-     * <p>
-     * Only {@link #getNumMatches()} are valid.
-     */
-    public BytesRef[] getMatchTerms() {
-        return matchTerms;
-    }
-
-    /** @lucene.internal */
-    public void setStartOffset(int startOffset) {
-        this.startOffset = startOffset;
-    }
-
-    /** @lucene.internal */
-    public void setEndOffset(int endOffset) {
-        this.endOffset = endOffset;
-    }
-
-    /** @lucene.internal */
-    public void setScore(float score) {
-        this.score = score;
+  private int startOffset = -1;
+  private int endOffset = -1;
+  private float score = 0.0f;
+
+  private int[] matchStarts = new int[8];
+  private int[] matchEnds = new int[8];
+  private BytesRef[] matchTerms = new BytesRef[8];
+  private int numMatches = 0;
+
+  /** @lucene.internal */
+  public void addMatch(int startOffset, int endOffset, BytesRef term) {
+    assert startOffset >= this.startOffset && startOffset <= this.endOffset;
+    if (numMatches == matchStarts.length) {
+      int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+      int newMatchStarts[] = new int[newLength];
+      int newMatchEnds[] = new int[newLength];
+      BytesRef newMatchTerms[] = new BytesRef[newLength];
+      System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
+      System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
+      System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
+      matchStarts = newMatchStarts;
+      matchEnds = newMatchEnds;
+      matchTerms = newMatchTerms;
     }
+    assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
+    matchStarts[numMatches] = startOffset;
+    matchEnds[numMatches] = endOffset;
+    matchTerms[numMatches] = term;
+    numMatches++;
+  }
+
+  /** @lucene.internal */
+  public void sort() {
+    final int starts[] = matchStarts;
+    final int ends[] = matchEnds;
+    final BytesRef terms[] = matchTerms;
+    new InPlaceMergeSorter() {
+      @Override
+      protected void swap(int i, int j) {
+        int temp = starts[i];
+        starts[i] = starts[j];
+        starts[j] = temp;
+
+        temp = ends[i];
+        ends[i] = ends[j];
+        ends[j] = temp;
+
+        BytesRef tempTerm = terms[i];
+        terms[i] = terms[j];
+        terms[j] = tempTerm;
+      }
+
+      @Override
+      protected int compare(int i, int j) {
+        return Integer.compare(starts[i], starts[j]);
+      }
+
+    }.sort(0, numMatches);
+  }
+
+  /** @lucene.internal */
+  public void reset() {
+    startOffset = endOffset = -1;
+    score = 0.0f;
+    numMatches = 0;
+  }
+
+  /**
+   * Start offset of this passage.
+   *
+   * @return start index (inclusive) of the passage in the
+   * original content: always &gt;= 0.
+   */
+  public int getStartOffset() {
+    return startOffset;
+  }
+
+  /**
+   * End offset of this passage.
+   *
+   * @return end index (exclusive) of the passage in the
+   * original content: always &gt;= {@link #getStartOffset()}
+   */
+  public int getEndOffset() {
+    return endOffset;
+  }
+
+  /**
+   * Passage's score.
+   */
+  public float getScore() {
+    return score;
+  }
+
+  /**
+   * Number of term matches available in
+   * {@link #getMatchStarts}, {@link #getMatchEnds},
+   * {@link #getMatchTerms}
+   */
+  public int getNumMatches() {
+    return numMatches;
+  }
+
+  /**
+   * Start offsets of the term matches, in increasing order.
+   * <p>
+   * Only {@link #getNumMatches} are valid. Note that these
+   * offsets are absolute (not relative to {@link #getStartOffset()}).
+   */
+  public int[] getMatchStarts() {
+    return matchStarts;
+  }
+
+  /**
+   * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
+   * <p>
+   * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
+   * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
+   * Analyzer produced a term which spans a passage boundary.
+   */
+  public int[] getMatchEnds() {
+    return matchEnds;
+  }
+
+  /**
+   * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
+   * <p>
+   * Only {@link #getNumMatches()} are valid.
+   */
+  public BytesRef[] getMatchTerms() {
+    return matchTerms;
+  }
+
+  /** @lucene.internal */
+  public void setStartOffset(int startOffset) {
+    this.startOffset = startOffset;
+  }
+
+  /** @lucene.internal */
+  public void setEndOffset(int endOffset) {
+    this.endOffset = endOffset;
+  }
+
+  /** @lucene.internal */
+  public void setScore(float score) {
+    this.score = score;
+  }
 }