You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mh...@apache.org on 2009/03/26 00:05:17 UTC
svn commit: r758460 - in /lucene/java/trunk: ./
contrib/highlighter/src/java/org/apache/lucene/search/highlight/
contrib/highlighter/src/test/org/apache/lucene/search/highlight/
Author: mharwood
Date: Wed Mar 25 23:05:14 2009
New Revision: 758460
URL: http://svn.apache.org/viewvc?rev=758460&view=rev
Log:
Fix for Lucene-1500 - new exception added to Highlighter API to handle TokenStreams with Tokens that exceed given text length
Added:
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=758460&r1=758459&r2=758460&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Mar 25 23:05:14 2009
@@ -65,6 +65,10 @@
11. LUCENE-1561: Renamed Field.omitTf to Field.omitTermFreqAndPositions
(Otis Gospodnetic via Mike McCandless)
+
+12. LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods
+ to denote issues when offsets in TokenStream tokens exceed the length of the
+ provided text. (Mark Harwood)
Bug fixes
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=758460&r1=758459&r2=758460&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Wed Mar 25 23:05:14 2009
@@ -74,9 +74,10 @@
* @param fieldName Name of field used to influence analyzer's tokenization policy
*
* @return highlighted text fragment or null if no terms found
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
return getBestFragment(tokenStream, text);
@@ -96,9 +97,10 @@
* @param text text to highlight terms in
*
* @return highlighted text fragment or null if no terms found
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String getBestFragment(TokenStream tokenStream, String text)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
String[] results = getBestFragments(tokenStream,text, 1);
if (results.length > 0)
@@ -120,12 +122,13 @@
* @deprecated This method incorrectly hardcodes the choice of fieldname. Use the
* method of the same name that takes a fieldname.
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String[] getBestFragments(
Analyzer analyzer,
String text,
int maxNumFragments)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments);
@@ -142,13 +145,14 @@
* @param maxNumFragments the maximum number of fragments.
*
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String[] getBestFragments(
Analyzer analyzer,
String fieldName,
String text,
int maxNumFragments)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments);
@@ -165,12 +169,13 @@
* @param maxNumFragments the maximum number of fragments.
*
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String[] getBestFragments(
TokenStream tokenStream,
String text,
int maxNumFragments)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
maxNumFragments = Math.max(1, maxNumFragments); //sanity check
@@ -198,13 +203,14 @@
* @param maxNumFragments
* @param mergeContiguousFragments
* @throws IOException
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final TextFragment[] getBestTextFragments(
TokenStream tokenStream,
String text,
boolean mergeContiguousFragments,
int maxNumFragments)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
ArrayList docFrags = new ArrayList();
StringBuffer newText=new StringBuffer();
@@ -230,6 +236,14 @@
(nextToken!= null)&&(nextToken.startOffset()< maxDocCharsToAnalyze);
nextToken = tokenStream.next(reusableToken))
{
+ if( (nextToken.endOffset()>text.length())
+ ||
+ (nextToken.startOffset()>text.length())
+ )
+ {
+ throw new InvalidTokenOffsetsException("Token "+nextToken.toString()
+ +" exceeds length of provided text sized "+text.length());
+ }
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(nextToken)))
{
//the current token is distinct from previous tokens -
@@ -452,13 +466,14 @@
* @param separator the separator used to intersperse the document fragments (typically "...")
*
* @return highlighted text
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
*/
public final String getBestFragments(
TokenStream tokenStream,
String text,
int maxNumFragments,
String separator)
- throws IOException
+ throws IOException, InvalidTokenOffsetsException
{
String sections[] = getBestFragments(tokenStream,text, maxNumFragments);
StringBuffer result = new StringBuffer();
Added: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java?rev=758460&view=auto
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java (added)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java Wed Mar 25 23:05:14 2009
@@ -0,0 +1,31 @@
+package org.apache.lucene.search.highlight;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Exception thrown if TokenStream Tokens are incompatible with provided text
+ *
+ */
+public class InvalidTokenOffsetsException extends Exception
+{
+
+ public InvalidTokenOffsetsException(String message)
+ {
+ super(message);
+ }
+
+}
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=758460&r1=758459&r2=758460&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Mar 25 23:05:14 2009
@@ -136,9 +136,10 @@
/**
* This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
+ * @throws InvalidTokenOffsetsException
*/
private static String highlightField(Query query, String fieldName, String text)
- throws IOException {
+ throws IOException, InvalidTokenOffsetsException {
CachingTokenFilter tokenStream = new CachingTokenFilter(new StandardAnalyzer().tokenStream(
fieldName, new StringReader(text)));
// Assuming "<B>", "</B>" used to highlight
@@ -1291,7 +1292,7 @@
private Directory dir = new RAMDirectory();
private Analyzer a = new WhitespaceAnalyzer();
- public void testWeightedTermsWithDeletes() throws IOException, ParseException {
+ public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException {
makeIndex();
deleteDocument();
searchIndex();
@@ -1321,7 +1322,7 @@
writer.close();
}
- private void searchIndex() throws IOException, ParseException {
+ private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
String q = "t_text1:random";
QueryParser parser = new QueryParser( "t_text1", a );
Query query = parser.parse( q );