You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Jason Calabrese <ma...@jasoncalabrese.com> on 2004/08/11 20:12:02 UTC
Highlighter: Scored Fragments
Mark/All,
I'm using your highlighter and have been very happy with it. You have saved
me a ton of time.
In my particular use I need to get a few fragments from several different
sources and then select the best fragment(s) to use. To do this I needed
access to the fragment scores, so I made a couple of changes to your code.
What do you think about exposing the score of the fragments?
I've included my changes as a patch against the current cvs version. My
implementaion is pretty basic, but I'm interested to see what you think.
Thanks,
Jason
Index:
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
===================================================================
RCS
file: /home/cvspublic/jakarta-lucene-sandbox/contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java,v
retrieving revision 1.2
diff -c -r1.2 Highlighter.java
***
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
26 Jul 2004 20:39:47 -0000 1.2
---
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
11 Aug 2004 17:01:37 -0000
***************
*** 118,123 ****
--- 118,179 ----
return (String[]) fragTexts.toArray(new String[0]);
}
+ public final ScoredFragment[] getScoredFragments(
+ TokenStream tokenStream,
+ String text,
+ int maxNumFragments)
+ throws IOException
+ {
+ maxNumFragments = Math.max(1, maxNumFragments); //sanity check
+ StringBuffer newText = new StringBuffer();
+
+ TextFragment[] frag =getBestDocFragments(tokenStream,text, newText,
maxNumFragments);
+
+ mergeContiguousFragments(frag);
+
+ //Get text
+ ArrayList scoredfrags = new ArrayList();
+ int n = 0;
+ for (int i = 0; i < frag.length; i++)
+ {
+ if ((frag[i] != null) && (frag[i].getScore() > 0))
+ {
+ String fragment = newText.substring(
+ frag[i].textStartPos,
+ frag[i].textEndPos);
+ scoredfrags.add(new ScoredFragment(fragment,
frag[i].getScore()));
+ }
+ }
+ return (ScoredFragment[]) scoredfrags.toArray(new
ScoredFragment[0]);
+ }
+
+ public final ScoredFragment getScoredFragments(
+ TokenStream tokenStream,
+ String text,
+ int maxNumFragments,
+ String separator)
+ throws IOException
+
+ {
+ ScoredFragment[] frags = getScoredFragments(tokenStream, text,
maxNumFragments);
+
+ StringBuffer result = new StringBuffer();
+ float totalScore = 0;
+ for (int i = 0; i < frags.length; i++)
+ {
+ if (i > 0)
+ {
+ result.append(separator);
+ }
+ result.append(frags[i].getFragment());
+ totalScore += frags[i].getScore();
+ }
+
+ return new ScoredFragment(result.toString(), totalScore);
+
+
+ }
+
/**
* Low level api to get the most relevant sections of the document
* @param tokenStream
Index:
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
===================================================================
RCS file:
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
diff -N
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
*** /dev/null 1 Jan 1970 00:00:00 -0000
---
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
1 Jan 1970 00:00:00 -0000
***************
*** 0 ****
--- 1,25 ----
+ package org.apache.lucene.search.highlight;
+
+ public final class ScoredFragment
+ {
+
+ private String fragment;
+ private float score;
+
+ public ScoredFragment(String fragment, float score)
+ {
+ this.fragment = fragment;
+ this.score = score;
+ }
+
+ public String getFragment()
+ {
+ return fragment;
+ }
+
+ public float getScore()
+ {
+ return score;
+ }
+
+ }
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-user-help@jakarta.apache.org