You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/11/17 16:25:44 UTC
[1/2] lucene-solr:branch_6x: LUCENE-7559: UnifiedHighlighter:
Increase Passage visibility
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x e525cca01 -> 06ee34c68
LUCENE-7559: UnifiedHighlighter: Increase Passage visibility
(cherry picked from commit c51e890)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/23b8bb66
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/23b8bb66
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/23b8bb66
Branch: refs/heads/branch_6x
Commit: 23b8bb669359008d19fd19d744ba42ef8870d25c
Parents: e525cca
Author: David Smiley <ds...@apache.org>
Authored: Thu Nov 17 11:19:54 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Nov 17 11:25:12 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../uhighlight/AnalysisOffsetStrategy.java | 3 ++
.../uhighlight/DefaultPassageFormatter.java | 14 ++++----
.../search/uhighlight/FieldHighlighter.java | 30 ++++++++--------
.../lucene/search/uhighlight/OffsetsEnum.java | 8 ++---
.../lucene/search/uhighlight/Passage.java | 38 ++++++++++++++------
.../uhighlight/TokenStreamOffsetStrategy.java | 7 ++--
.../uhighlight/TestUnifiedHighlighterMTQ.java | 14 ++++----
8 files changed, 70 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c252da8..6039562 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Other
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
+* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
+ be customized. (David Smiley)
+
Build
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index e9db77c..162d270 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -75,6 +75,9 @@ public abstract class AnalysisOffsetStrategy extends FieldOffsetStrategy {
*
* @lucene.internal
*/
+ // TODO we could make this go away. MemoryIndexOffsetStrategy could simply split and analyze each value into the
+ // MemoryIndex. TokenStreamOffsetStrategy's hack TokenStreamPostingsEnum could incorporate this logic,
+ // albeit with less code, less hack.
private static final class MultiValueTokenStream extends TokenFilter {
private final String fieldName;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
index 77612a7..bc27a43 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@@ -63,13 +63,13 @@ public class DefaultPassageFormatter extends PassageFormatter {
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
- if (passage.startOffset > pos && pos > 0) {
+ if (passage.getStartOffset() > pos && pos > 0) {
sb.append(ellipsis);
}
- pos = passage.startOffset;
- for (int i = 0; i < passage.numMatches; i++) {
- int start = passage.matchStarts[i];
- int end = passage.matchEnds[i];
+ pos = passage.getStartOffset();
+ for (int i = 0; i < passage.getNumMatches(); i++) {
+ int start = passage.getMatchStarts()[i];
+ int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
@@ -82,8 +82,8 @@ public class DefaultPassageFormatter extends PassageFormatter {
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
- append(sb, content, pos, Math.max(pos, passage.endOffset));
- pos = passage.endOffset;
+ append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
+ pos = passage.getEndOffset();
}
return sb.toString();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
index 7c8f048..1caa739 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@@ -117,9 +117,9 @@ public class FieldHighlighter {
break;
}
Passage passage = new Passage();
- passage.score = Float.NaN;
- passage.startOffset = pos;
- passage.endOffset = next;
+ passage.setScore(Float.NaN);
+ passage.setStartOffset(pos);
+ passage.setEndOffset(next);
passages.add(passage);
pos = next;
}
@@ -145,12 +145,12 @@ public class FieldHighlighter {
offsetsEnumQueue.add(new OffsetsEnum(null, EMPTY)); // a sentinel for termination
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
- if (left.score < right.score) {
+ if (left.getScore() < right.getScore()) {
return -1;
- } else if (left.score > right.score) {
+ } else if (left.getScore() > right.getScore()) {
return 1;
} else {
- return left.startOffset - right.startOffset;
+ return left.getStartOffset() - right.getStartOffset();
}
});
Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue.
@@ -170,12 +170,12 @@ public class FieldHighlighter {
continue;
}
// See if this term should be part of a new passage.
- if (start >= passage.endOffset) {
- if (passage.startOffset >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
+ if (start >= passage.getEndOffset()) {
+ if (passage.getStartOffset() >= 0) { // true if this passage has terms; otherwise couldn't find any (yet)
// finalize passage
- passage.score *= scorer.norm(passage.startOffset);
+ passage.setScore(passage.getScore() * scorer.norm(passage.getStartOffset()));
// new sentence: first add 'passage' to queue
- if (passageQueue.size() == maxPassages && passage.score < passageQueue.peek().score) {
+ if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
passage.reset(); // can't compete, just reset it
} else {
passageQueue.offer(passage);
@@ -192,8 +192,8 @@ public class FieldHighlighter {
break;
}
// advance breakIterator
- passage.startOffset = Math.max(breakIterator.preceding(start + 1), 0);
- passage.endOffset = Math.min(breakIterator.following(start), contentLength);
+ passage.setStartOffset(Math.max(breakIterator.preceding(start + 1), 0));
+ passage.setEndOffset(Math.min(breakIterator.following(start), contentLength));
}
// Add this term to the passage.
int tf = 0;
@@ -209,12 +209,12 @@ public class FieldHighlighter {
off.nextPosition();
start = off.startOffset();
end = off.endOffset();
- if (start >= passage.endOffset || end > contentLength) { // it's beyond this passage
+ if (start >= passage.getEndOffset() || end > contentLength) { // it's beyond this passage
offsetsEnumQueue.offer(off);
break;
}
}
- passage.score += off.weight * scorer.tf(tf, passage.endOffset - passage.startOffset);
+ passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
}
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
@@ -222,7 +222,7 @@ public class FieldHighlighter {
p.sort();
}
// sort in ascending order
- Arrays.sort(passages, (left, right) -> left.startOffset - right.startOffset);
+ Arrays.sort(passages, (left, right) -> left.getStartOffset() - right.getStartOffset());
return passages;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index cbaeb90..db1ea1f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -66,9 +66,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
}
BytesRef getTerm() throws IOException {
- // the dp.getPayload thing is a hack -- see MultiTermHighlighting
- return term != null ? term : postingsEnum.getPayload();
- // We don't deepcopy() because in this hack we know we don't have to.
+ // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
+ return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
}
boolean hasMorePositions() throws IOException {
@@ -91,7 +90,8 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
@Override
public void close() throws IOException {
- if (postingsEnum instanceof Closeable) { // the one in MultiTermHighlighting is.
+ // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
+ if (postingsEnum instanceof Closeable) {
((Closeable) postingsEnum).close();
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
index de37d5d..a131d86 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@@ -30,16 +30,17 @@ import org.apache.lucene.util.RamUsageEstimator;
*
* @lucene.experimental
*/
-public final class Passage {
- int startOffset = -1;
- int endOffset = -1;
- float score = 0.0f;
+public class Passage {
+ private int startOffset = -1;
+ private int endOffset = -1;
+ private float score = 0.0f;
- int matchStarts[] = new int[8];
- int matchEnds[] = new int[8];
- BytesRef matchTerms[] = new BytesRef[8];
- int numMatches = 0;
+ private int[] matchStarts = new int[8];
+ private int[] matchEnds = new int[8];
+ private BytesRef[] matchTerms = new BytesRef[8];
+ private int numMatches = 0;
+ /** @lucene.internal */
public void addMatch(int startOffset, int endOffset, BytesRef term) {
assert startOffset >= this.startOffset && startOffset <= this.endOffset;
if (numMatches == matchStarts.length) {
@@ -61,7 +62,8 @@ public final class Passage {
numMatches++;
}
- void sort() {
+ /** @lucene.internal */
+ public void sort() {
final int starts[] = matchStarts;
final int ends[] = matchEnds;
final BytesRef terms[] = matchTerms;
@@ -89,7 +91,8 @@ public final class Passage {
}.sort(0, numMatches);
}
- void reset() {
+ /** @lucene.internal */
+ public void reset() {
startOffset = endOffset = -1;
score = 0.0f;
numMatches = 0;
@@ -158,4 +161,19 @@ public final class Passage {
public BytesRef[] getMatchTerms() {
return matchTerms;
}
+
+ /** @lucene.internal */
+ public void setStartOffset(int startOffset) {
+ this.startOffset = startOffset;
+ }
+
+ /** @lucene.internal */
+ public void setEndOffset(int endOffset) {
+ this.endOffset = endOffset;
+ }
+
+ /** @lucene.internal */
+ public void setScore(float score) {
+ this.score = score;
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index 966eeef..28eb6b1 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -69,10 +69,8 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
return Collections.singletonList(new OffsetsEnum(null, mtqPostingsEnum));
}
- // but this would have a performance cost for likely little gain in the user experience, it
- // would only serve to make this method less bogus.
- // instead, we always return freq() = Integer.MAX_VALUE and let the highlighter terminate based on offset...
- // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl?
+ // See class javadocs.
+ // TODO: DWS perhaps instead OffsetsEnum could become abstract and this would be an impl? See TODOs in OffsetsEnum.
private static class TokenStreamPostingsEnum extends PostingsEnum implements Closeable {
TokenStream stream; // becomes null when closed
final CharacterRunAutomaton[] matchers;
@@ -134,6 +132,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
return currentEndOffset;
}
+ // TOTAL HACK; used in OffsetsEnum.getTerm()
@Override
public BytesRef getPayload() throws IOException {
if (matchDescriptions[currentMatch] == null) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23b8bb66/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 23eefdf..5435b11 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -697,13 +697,13 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
- if (passage.startOffset > pos && pos > 0) {
+ if (passage.getStartOffset() > pos && pos > 0) {
sb.append("... ");
}
- pos = passage.startOffset;
- for (int i = 0; i < passage.numMatches; i++) {
- int start = passage.matchStarts[i];
- int end = passage.matchEnds[i];
+ pos = passage.getStartOffset();
+ for (int i = 0; i < passage.getNumMatches(); i++) {
+ int start = passage.getMatchStarts()[i];
+ int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
sb.append(content, pos, start);
@@ -719,8 +719,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
- sb.append(content, pos, Math.max(pos, passage.endOffset));
- pos = passage.endOffset;
+ sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
+ pos = passage.getEndOffset();
}
return sb.toString();
}
[2/2] lucene-solr:branch_6x: LUCENE-7559: fix indentation of entire
file
Posted by ds...@apache.org.
LUCENE-7559: fix indentation of entire file
(cherry picked from commit cbc8392)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/06ee34c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/06ee34c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/06ee34c6
Branch: refs/heads/branch_6x
Commit: 06ee34c68225fd904b8909af44173574890d881e
Parents: 23b8bb6
Author: David Smiley <ds...@apache.org>
Authored: Thu Nov 17 11:23:40 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Nov 17 11:25:36 2016 -0500
----------------------------------------------------------------------
.../uhighlight/DefaultPassageFormatter.java | 204 ++++++-------
.../lucene/search/uhighlight/Passage.java | 292 ++++++++++---------
2 files changed, 250 insertions(+), 246 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/06ee34c6/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
index bc27a43..62d58df 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java
@@ -24,115 +24,117 @@ package org.apache.lucene.search.uhighlight;
* ellipses between unconnected passages.
*/
public class DefaultPassageFormatter extends PassageFormatter {
- /** text that will appear before highlighted terms */
- protected final String preTag;
- /** text that will appear after highlighted terms */
- protected final String postTag;
- /** text that will appear between two unconnected passages */
- protected final String ellipsis;
- /** true if we should escape for html */
- protected final boolean escape;
+ /** text that will appear before highlighted terms */
+ protected final String preTag;
+ /** text that will appear after highlighted terms */
+ protected final String postTag;
+ /** text that will appear between two unconnected passages */
+ protected final String ellipsis;
+ /** true if we should escape for html */
+ protected final boolean escape;
- /**
- * Creates a new DefaultPassageFormatter with the default tags.
- */
- public DefaultPassageFormatter() {
- this("<b>", "</b>", "... ", false);
- }
+ /**
+ * Creates a new DefaultPassageFormatter with the default tags.
+ */
+ public DefaultPassageFormatter() {
+ this("<b>", "</b>", "... ", false);
+ }
- /**
- * Creates a new DefaultPassageFormatter with custom tags.
- * @param preTag text which should appear before a highlighted term.
- * @param postTag text which should appear after a highlighted term.
- * @param ellipsis text which should be used to connect two unconnected passages.
- * @param escape true if text should be html-escaped
- */
- public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
- if (preTag == null || postTag == null || ellipsis == null) {
- throw new NullPointerException();
- }
- this.preTag = preTag;
- this.postTag = postTag;
- this.ellipsis = ellipsis;
- this.escape = escape;
+ /**
+ * Creates a new DefaultPassageFormatter with custom tags.
+ *
+ * @param preTag text which should appear before a highlighted term.
+ * @param postTag text which should appear after a highlighted term.
+ * @param ellipsis text which should be used to connect two unconnected passages.
+ * @param escape true if text should be html-escaped
+ */
+ public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
+ if (preTag == null || postTag == null || ellipsis == null) {
+ throw new NullPointerException();
}
+ this.preTag = preTag;
+ this.postTag = postTag;
+ this.ellipsis = ellipsis;
+ this.escape = escape;
+ }
- @Override
- public String format(Passage passages[], String content) {
- StringBuilder sb = new StringBuilder();
- int pos = 0;
- for (Passage passage : passages) {
- // don't add ellipsis if its the first one, or if its connected.
- if (passage.getStartOffset() > pos && pos > 0) {
- sb.append(ellipsis);
- }
- pos = passage.getStartOffset();
- for (int i = 0; i < passage.getNumMatches(); i++) {
- int start = passage.getMatchStarts()[i];
- int end = passage.getMatchEnds()[i];
- // its possible to have overlapping terms
- if (start > pos) {
- append(sb, content, pos, start);
- }
- if (end > pos) {
- sb.append(preTag);
- append(sb, content, Math.max(pos, start), end);
- sb.append(postTag);
- pos = end;
- }
- }
- // its possible a "term" from the analyzer could span a sentence boundary.
- append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
- pos = passage.getEndOffset();
+ @Override
+ public String format(Passage passages[], String content) {
+ StringBuilder sb = new StringBuilder();
+ int pos = 0;
+ for (Passage passage : passages) {
+ // don't add ellipsis if its the first one, or if its connected.
+ if (passage.getStartOffset() > pos && pos > 0) {
+ sb.append(ellipsis);
+ }
+ pos = passage.getStartOffset();
+ for (int i = 0; i < passage.getNumMatches(); i++) {
+ int start = passage.getMatchStarts()[i];
+ int end = passage.getMatchEnds()[i];
+ // its possible to have overlapping terms
+ if (start > pos) {
+ append(sb, content, pos, start);
+ }
+ if (end > pos) {
+ sb.append(preTag);
+ append(sb, content, Math.max(pos, start), end);
+ sb.append(postTag);
+ pos = end;
}
- return sb.toString();
+ }
+ // its possible a "term" from the analyzer could span a sentence boundary.
+ append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
+ pos = passage.getEndOffset();
}
+ return sb.toString();
+ }
- /**
- * Appends original text to the response.
- * @param dest resulting text, possibly transformed or encoded
- * @param content original text content
- * @param start index of the first character in content
- * @param end index of the character following the last character in content
- */
- protected void append(StringBuilder dest, String content, int start, int end) {
- if (escape) {
- // note: these are the rules from owasp.org
- for (int i = start; i < end; i++) {
- char ch = content.charAt(i);
- switch(ch) {
- case '&':
- dest.append("&");
- break;
- case '<':
- dest.append("<");
- break;
- case '>':
- dest.append(">");
- break;
- case '"':
- dest.append(""");
- break;
- case '\'':
- dest.append("'");
- break;
- case '/':
- dest.append("/");
- break;
- default:
- if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
- dest.append(ch);
- } else if (ch < 0xff) {
- dest.append("&#");
- dest.append((int)ch);
- dest.append(";");
- } else {
- dest.append(ch);
- }
- }
+ /**
+ * Appends original text to the response.
+ *
+ * @param dest resulting text, possibly transformed or encoded
+ * @param content original text content
+ * @param start index of the first character in content
+ * @param end index of the character following the last character in content
+ */
+ protected void append(StringBuilder dest, String content, int start, int end) {
+ if (escape) {
+ // note: these are the rules from owasp.org
+ for (int i = start; i < end; i++) {
+ char ch = content.charAt(i);
+ switch (ch) {
+ case '&':
+ dest.append("&");
+ break;
+ case '<':
+ dest.append("<");
+ break;
+ case '>':
+ dest.append(">");
+ break;
+ case '"':
+ dest.append(""");
+ break;
+ case '\'':
+ dest.append("'");
+ break;
+ case '/':
+ dest.append("/");
+ break;
+ default:
+ if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
+ dest.append(ch);
+ } else if (ch < 0xff) {
+ dest.append("&#");
+ dest.append((int) ch);
+ dest.append(";");
+ } else {
+ dest.append(ch);
}
- } else {
- dest.append(content, start, end);
}
+ }
+ } else {
+ dest.append(content, start, end);
}
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/06ee34c6/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
index a131d86..d64b96e 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/Passage.java
@@ -23,7 +23,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.RamUsageEstimator;
/**
- * Represents a passage (typically a sentence of the document).
+ * Represents a passage (typically a sentence of the document).
* <p>
* A passage contains {@link #getNumMatches} highlights from the query,
* and the offsets and query terms that correspond with each match.
@@ -31,149 +31,151 @@ import org.apache.lucene.util.RamUsageEstimator;
* @lucene.experimental
*/
public class Passage {
- private int startOffset = -1;
- private int endOffset = -1;
- private float score = 0.0f;
-
- private int[] matchStarts = new int[8];
- private int[] matchEnds = new int[8];
- private BytesRef[] matchTerms = new BytesRef[8];
- private int numMatches = 0;
-
- /** @lucene.internal */
- public void addMatch(int startOffset, int endOffset, BytesRef term) {
- assert startOffset >= this.startOffset && startOffset <= this.endOffset;
- if (numMatches == matchStarts.length) {
- int newLength = ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
- int newMatchStarts[] = new int[newLength];
- int newMatchEnds[] = new int[newLength];
- BytesRef newMatchTerms[] = new BytesRef[newLength];
- System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
- System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
- System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
- matchStarts = newMatchStarts;
- matchEnds = newMatchEnds;
- matchTerms = newMatchTerms;
- }
- assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
- matchStarts[numMatches] = startOffset;
- matchEnds[numMatches] = endOffset;
- matchTerms[numMatches] = term;
- numMatches++;
- }
-
- /** @lucene.internal */
- public void sort() {
- final int starts[] = matchStarts;
- final int ends[] = matchEnds;
- final BytesRef terms[] = matchTerms;
- new InPlaceMergeSorter() {
- @Override
- protected void swap(int i, int j) {
- int temp = starts[i];
- starts[i] = starts[j];
- starts[j] = temp;
-
- temp = ends[i];
- ends[i] = ends[j];
- ends[j] = temp;
-
- BytesRef tempTerm = terms[i];
- terms[i] = terms[j];
- terms[j] = tempTerm;
- }
-
- @Override
- protected int compare(int i, int j) {
- return Integer.compare(starts[i], starts[j]);
- }
-
- }.sort(0, numMatches);
- }
-
- /** @lucene.internal */
- public void reset() {
- startOffset = endOffset = -1;
- score = 0.0f;
- numMatches = 0;
- }
-
- /**
- * Start offset of this passage.
- * @return start index (inclusive) of the passage in the
- * original content: always >= 0.
- */
- public int getStartOffset() {
- return startOffset;
- }
-
- /**
- * End offset of this passage.
- * @return end index (exclusive) of the passage in the
- * original content: always >= {@link #getStartOffset()}
- */
- public int getEndOffset() {
- return endOffset;
- }
-
- /**
- * Passage's score.
- */
- public float getScore() {
- return score;
- }
-
- /**
- * Number of term matches available in
- * {@link #getMatchStarts}, {@link #getMatchEnds},
- * {@link #getMatchTerms}
- */
- public int getNumMatches() {
- return numMatches;
- }
-
- /**
- * Start offsets of the term matches, in increasing order.
- * <p>
- * Only {@link #getNumMatches} are valid. Note that these
- * offsets are absolute (not relative to {@link #getStartOffset()}).
- */
- public int[] getMatchStarts() {
- return matchStarts;
- }
-
- /**
- * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
- * <p>
- * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
- * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
- * Analyzer produced a term which spans a passage boundary.
- */
- public int[] getMatchEnds() {
- return matchEnds;
- }
-
- /**
- * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
- * <p>
- * Only {@link #getNumMatches()} are valid.
- */
- public BytesRef[] getMatchTerms() {
- return matchTerms;
- }
-
- /** @lucene.internal */
- public void setStartOffset(int startOffset) {
- this.startOffset = startOffset;
- }
-
- /** @lucene.internal */
- public void setEndOffset(int endOffset) {
- this.endOffset = endOffset;
- }
-
- /** @lucene.internal */
- public void setScore(float score) {
- this.score = score;
+ private int startOffset = -1;
+ private int endOffset = -1;
+ private float score = 0.0f;
+
+ private int[] matchStarts = new int[8];
+ private int[] matchEnds = new int[8];
+ private BytesRef[] matchTerms = new BytesRef[8];
+ private int numMatches = 0;
+
+ /** @lucene.internal */
+ public void addMatch(int startOffset, int endOffset, BytesRef term) {
+ assert startOffset >= this.startOffset && startOffset <= this.endOffset;
+ if (numMatches == matchStarts.length) {
+ int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ int newMatchStarts[] = new int[newLength];
+ int newMatchEnds[] = new int[newLength];
+ BytesRef newMatchTerms[] = new BytesRef[newLength];
+ System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches);
+ System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches);
+ System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches);
+ matchStarts = newMatchStarts;
+ matchEnds = newMatchEnds;
+ matchTerms = newMatchTerms;
}
+ assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length;
+ matchStarts[numMatches] = startOffset;
+ matchEnds[numMatches] = endOffset;
+ matchTerms[numMatches] = term;
+ numMatches++;
+ }
+
+ /** @lucene.internal */
+ public void sort() {
+ final int starts[] = matchStarts;
+ final int ends[] = matchEnds;
+ final BytesRef terms[] = matchTerms;
+ new InPlaceMergeSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ int temp = starts[i];
+ starts[i] = starts[j];
+ starts[j] = temp;
+
+ temp = ends[i];
+ ends[i] = ends[j];
+ ends[j] = temp;
+
+ BytesRef tempTerm = terms[i];
+ terms[i] = terms[j];
+ terms[j] = tempTerm;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return Integer.compare(starts[i], starts[j]);
+ }
+
+ }.sort(0, numMatches);
+ }
+
+ /** @lucene.internal */
+ public void reset() {
+ startOffset = endOffset = -1;
+ score = 0.0f;
+ numMatches = 0;
+ }
+
+ /**
+ * Start offset of this passage.
+ *
+ * @return start index (inclusive) of the passage in the
+ * original content: always >= 0.
+ */
+ public int getStartOffset() {
+ return startOffset;
+ }
+
+ /**
+ * End offset of this passage.
+ *
+ * @return end index (exclusive) of the passage in the
+ * original content: always >= {@link #getStartOffset()}
+ */
+ public int getEndOffset() {
+ return endOffset;
+ }
+
+ /**
+ * Passage's score.
+ */
+ public float getScore() {
+ return score;
+ }
+
+ /**
+ * Number of term matches available in
+ * {@link #getMatchStarts}, {@link #getMatchEnds},
+ * {@link #getMatchTerms}
+ */
+ public int getNumMatches() {
+ return numMatches;
+ }
+
+ /**
+ * Start offsets of the term matches, in increasing order.
+ * <p>
+ * Only {@link #getNumMatches} are valid. Note that these
+ * offsets are absolute (not relative to {@link #getStartOffset()}).
+ */
+ public int[] getMatchStarts() {
+ return matchStarts;
+ }
+
+ /**
+ * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
+ * <p>
+ * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
+ * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
+ * Analyzer produced a term which spans a passage boundary.
+ */
+ public int[] getMatchEnds() {
+ return matchEnds;
+ }
+
+ /**
+ * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}.
+ * <p>
+ * Only {@link #getNumMatches()} are valid.
+ */
+ public BytesRef[] getMatchTerms() {
+ return matchTerms;
+ }
+
+ /** @lucene.internal */
+ public void setStartOffset(int startOffset) {
+ this.startOffset = startOffset;
+ }
+
+ /** @lucene.internal */
+ public void setEndOffset(int endOffset) {
+ this.endOffset = endOffset;
+ }
+
+ /** @lucene.internal */
+ public void setScore(float score) {
+ this.score = score;
+ }
}