You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2005/02/19 20:08:53 UTC
svn commit: r154444 - in lucene/java/trunk/contrib/highlighter/src:
java/org/apache/lucene/search/highlight/Highlighter.java
test/org/apache/lucene/search/highlight/HighlighterTest.java
Author: dnaber
Date: Sat Feb 19 11:08:52 2005
New Revision: 154444
URL: http://svn.apache.org/viewcvs?view=rev&rev=154444
Log:
offer additional methods that take analyzer + text instead of tokenstream; fix some unused imports and variables
Modified:
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?view=diff&r1=154443&r2=154444
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Sat Feb 19 11:08:52 2005
@@ -16,9 +16,11 @@
*/
import java.io.IOException;
+import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.PriorityQueue;
@@ -57,8 +59,24 @@
this.fragmentScorer = fragmentScorer;
}
-
-
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant section.
+ * This is a convenience method that calls
+ * {@link #getBestFragment(TokenStream, String)}
+ *
+ * @param analyzer the analyzer that will be used to split <code>text</code>
+ * into chunks
+ * @param text text to highlight terms in
+ *
+ * @return highlighted text fragment or null if no terms found
+ */
+ public final String getBestFragment(Analyzer analyzer, String text)
+ throws IOException
+ {
+ TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
+ return getBestFragment(tokenStream, text);
+ }
+
/**
* Highlights chosen terms in a text, extracting the most relevant section.
* The document text is analysed in chunks to record hit statistics
@@ -84,6 +102,29 @@
}
return null;
}
+
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant sections.
+ * This is a convenience method that calls
+ * {@link #getBestFragments(TokenStream, String, int)}
+ *
+ * @param analyzer the analyzer that will be used to split <code>text</code>
+ * into chunks
+ * @param text text to highlight terms in
+ * @param maxNumFragments the maximum number of fragments.
+ *
+ * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ */
+ public final String[] getBestFragments(
+ Analyzer analyzer,
+ String text,
+ int maxNumFragments)
+ throws IOException
+ {
+ TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
+ return getBestFragments(tokenStream, text, maxNumFragments);
+ }
+
/**
* Highlights chosen terms in a text, extracting the most relevant sections.
* The document text is analysed in chunks to record hit statistics
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?view=diff&r1=154443&r2=154444
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Sat Feb 19 11:08:52 2005
@@ -17,7 +17,6 @@
*/
import java.io.ByteArrayInputStream;
-import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -27,7 +26,6 @@
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
import junit.framework.TestCase;
@@ -50,7 +48,6 @@
import org.apache.lucene.store.RAMDirectory;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;
/**
* JUnit Test for Highlighter class.
@@ -157,7 +154,6 @@
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
}
-
public void testGetBestSingleFragment() throws Exception
{
doSearching("Kennedy");
@@ -172,6 +168,23 @@
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
+ numHighlights = 0;
+ for (int i = 0; i < hits.length(); i++)
+ {
+ String text = hits.doc(i).get(FIELD_NAME);
+ highlighter.getBestFragment(analyzer, text);
+ }
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
+ numHighlights = 0;
+ for (int i = 0; i < hits.length(); i++)
+ {
+ String text = hits.doc(i).get(FIELD_NAME);
+ highlighter.getBestFragments(analyzer, text, 10);
+ }
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
}
public void testGetBestSingleFragmentWithWeights() throws Exception
@@ -278,7 +291,7 @@
TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
String result = highlighter.getBestFragment(tokenStream,texts[0]);
assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
- "us from finding matches for this record" + numHighlights +
+ "us from finding matches for this record: " + numHighlights +
" found", numHighlights == 0);
}
@@ -322,7 +335,6 @@
Highlighter highlighter =
new Highlighter(this,new QueryScorer(query));
- int highlightFragmentSizeInBytes = 40;
for (int i = 0; i < texts.length; i++)
{
String text = texts[i];
@@ -568,8 +580,8 @@
//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
//========== MADE MORE GENERALLY USEFUL.
// TODO - make synonyms all interchangeable with each other and produce
-// a version that does antonyms(?) - the "is a specialised type of ...."
-// so that car=audi, bmw and volkswagen but bmw != audi so different
+// a version that does hyponyms - the "is a specialised type of ...."
+// so that car = audi, bmw and volkswagen but bmw != audi so different
// behaviour to synonyms
//===================================================================
@@ -587,7 +599,6 @@
*/
public TokenStream tokenStream(String arg0, Reader arg1)
{
-
return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org