You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2008/11/13 01:01:44 UTC
svn commit: r713569 - in /lucene/java/trunk/contrib/highlighter/src:
java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
test/org/apache/lucene/search/highlight/HighlighterTest.java
Author: markrmiller
Date: Wed Nov 12 16:01:43 2008
New Revision: 713569
URL: http://svn.apache.org/viewvc?rev=713569&view=rev
Log:
LUCENE-1389: SimpleSpanFragmenter can create very short fragments
Modified:
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java?rev=713569&r1=713568&r2=713569&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java Wed Nov 12 16:01:43 2008
@@ -33,6 +33,7 @@
private int position = -1;
private SpanScorer spanScorer;
private int waitForPos = -1;
+ private int textSize;
/**
* @param spanscorer SpanScorer that was used to score hits
@@ -70,14 +71,14 @@
for (int i = 0; i < positionSpans.size(); i++) {
if (((PositionSpan) positionSpans.get(i)).start == position) {
waitForPos = ((PositionSpan) positionSpans.get(i)).end + 1;
-
- return true;
+ break;
}
}
}
- boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
-
+ boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags)
+ && (textSize - token.endOffset()) >= (fragmentSize >>> 1);
+
if (isNewFrag) {
currentNumFrags++;
}
@@ -89,7 +90,8 @@
* @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String)
*/
public void start(String originalText) {
- position = 0;
+ position = -1;
currentNumFrags = 1;
+ textSize = originalText.length();
}
}
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=713569&r1=713568&r2=713569&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Nov 12 16:01:43 2008
@@ -242,6 +242,46 @@
}
}
+ public void testSimpleSpanFragmenter() throws Exception {
+ doSearching("\"piece of text that is very long\"");
+
+ int maxNumFragmentsRequired = 2;
+
+ for (int i = 0; i < hits.length(); i++) {
+ String text = hits.doc(i).get(FIELD_NAME);
+ CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
+ .tokenStream(FIELD_NAME, new StringReader(text)));
+ SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
+ Highlighter highlighter = new Highlighter(this, spanscorer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
+ tokenStream.reset();
+
+ String result = highlighter.getBestFragments(tokenStream, text,
+ maxNumFragmentsRequired, "...");
+ System.out.println("\t" + result);
+
+ }
+
+ doSearching("\"been shot\"");
+
+ maxNumFragmentsRequired = 2;
+
+ for (int i = 0; i < hits.length(); i++) {
+ String text = hits.doc(i).get(FIELD_NAME);
+ CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
+ .tokenStream(FIELD_NAME, new StringReader(text)));
+ SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
+ Highlighter highlighter = new Highlighter(this, spanscorer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
+ tokenStream.reset();
+
+ String result = highlighter.getBestFragments(tokenStream, text,
+ maxNumFragmentsRequired, "...");
+ System.out.println("\t" + result);
+
+ }
+ }
+
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
doSearching("y \"x y z\"");