You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Jason Calabrese <ma...@jasoncalabrese.com> on 2004/08/11 20:12:02 UTC
Highlighter: Scored Fragments

Mark/All,

I'm using your highlighter and have been very happy with it.  You have saved 
me a ton of time.

In my particular use I need to get a few fragments from several different 
sources and then select the best fragment(s) to use.  To do this I needed 
access to the fragment scores, so I made a couple of changes to your code.

What do you think about exposing the score of the fragments?  

I've included my changes as a patch against the current cvs version.  My 
implementaion is pretty basic, but I'm interested to see what you think.

Thanks,

Jason

Index: 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
===================================================================
RCS 
file: /home/cvspublic/jakarta-lucene-sandbox/contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java,v
retrieving revision 1.2
diff -c -r1.2 Highlighter.java
*** 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java	
26 Jul 2004 20:39:47 -0000	1.2
--- 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java	
11 Aug 2004 17:01:37 -0000
***************
*** 118,123 ****
--- 118,179 ----
  		return (String[]) fragTexts.toArray(new String[0]);
  	}
  
+     public final ScoredFragment[] getScoredFragments(
+             TokenStream tokenStream,    
+             String text,
+             int maxNumFragments)
+             throws IOException
+     {
+         maxNumFragments = Math.max(1, maxNumFragments); //sanity check
+         StringBuffer newText = new StringBuffer();
+         
+         TextFragment[] frag =getBestDocFragments(tokenStream,text, newText, 
maxNumFragments);
+ 
+         mergeContiguousFragments(frag);
+ 
+         //Get text
+         ArrayList scoredfrags = new ArrayList();
+         int n = 0;
+         for (int i = 0; i < frag.length; i++)
+         {
+             if ((frag[i] != null) && (frag[i].getScore() > 0))
+             {
+                 String fragment = newText.substring(
+                         frag[i].textStartPos,
+                         frag[i].textEndPos);
+                 scoredfrags.add(new ScoredFragment(fragment, 
frag[i].getScore()));
+             }
+         }
+         return (ScoredFragment[]) scoredfrags.toArray(new 
ScoredFragment[0]);
+     }
+ 
+     public final ScoredFragment getScoredFragments(
+             TokenStream tokenStream,    
+             String text,
+             int maxNumFragments,
+             String separator)
+             throws IOException
+             
+     {
+         ScoredFragment[] frags = getScoredFragments(tokenStream, text, 
maxNumFragments);
+         
+         StringBuffer result = new StringBuffer();
+         float totalScore = 0;
+         for (int i = 0; i < frags.length; i++)
+         {
+             if (i > 0)
+             {
+                 result.append(separator);
+             }
+             result.append(frags[i].getFragment());
+             totalScore += frags[i].getScore();
+         }
+         
+         return new ScoredFragment(result.toString(), totalScore);
+         
+         
+     }
+     
  	/**
  	 * Low level api to get the most relevant sections of the document
  	 * @param tokenStream
Index: 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
===================================================================
RCS file: 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
diff -N 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- 
contributions/highlighter/src/java/org/apache/lucene/search/highlight/ScoredFragment.java	
1 Jan 1970 00:00:00 -0000
***************
*** 0 ****
--- 1,25 ----
+ package org.apache.lucene.search.highlight;
+ 
+ public final class ScoredFragment
+ {
+ 
+     private String fragment;
+     private float score;
+     
+     public ScoredFragment(String fragment, float score) 
+     {
+         this.fragment = fragment;
+         this.score = score;
+     }
+     
+     public String getFragment() 
+     {
+         return fragment;
+     }
+     
+     public float getScore()
+     {
+         return score;
+     }
+ 
+ }

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-user-help@jakarta.apache.org