You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-dev@lucene.apache.org by Erik Hatcher <er...@ehatchersolutions.com> on 2007/02/22 03:29:05 UTC
Re: svn commit: r510338 - in /lucene/solr/trunk/src/java/org/apache/solr: request/SolrParams.java util/HighlightingUtils.java util/SolrPluginUtils.java
Mike,
Your commit causes build failures. Do "ant clean example"
Erik
compile:
[mkdir] Created dir: /Users/erik/dev/solr/build
[javac] Compiling 171 source files to /Users/erik/dev/solr/build
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:144: cannot find symbol
[javac] symbol : variable HIGHLIGHT
[javac] location: class org.apache.solr.request.SolrParams
[javac] tmp = args.get(SolrParams.HIGHLIGHT);
[javac] ^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:151: cannot find symbol
[javac] symbol : variable HIGHLIGHT
[javac] location: class org.apache.solr.request.SolrParams
[javac] SolrCore.log.severe("init param is not a str: "
+ SolrParams.HIGHLIGHT);
[javac]
^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:155: cannot find symbol
[javac] symbol : variable HIGHLIGHT_FIELDS
[javac] location: class org.apache.solr.request.SolrParams
[javac] tmp = args.get(SolrParams.HIGHLIGHT_FIELDS);
[javac] ^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:160: cannot find symbol
[javac] symbol : variable HIGHLIGHT
[javac] location: class org.apache.solr.request.SolrParams
[javac] SolrCore.log.severe("init param is not a str: "
+ SolrParams.HIGHLIGHT);
[javac]
^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:164: cannot find symbol
[javac] symbol : variable MAX_SNIPPETS
[javac] location: class org.apache.solr.request.SolrParams
[javac] tmp = args.get(SolrParams.MAX_SNIPPETS);
[javac] ^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:169: cannot find symbol
[javac] symbol : variable MAX_SNIPPETS
[javac] location: class org.apache.solr.request.SolrParams
[javac] SolrCore.log.severe("init param is not an int: "
+ SolrParams.MAX_SNIPPETS);
[javac]
^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:173: cannot find symbol
[javac] symbol : variable HIGHLIGHT_FORMATTER_CLASS
[javac] location: class org.apache.solr.request.SolrParams
[javac] tmp = args.get(SolrParams.HIGHLIGHT_FORMATTER_CLASS);
[javac] ^
[javac] /Users/erik/dev/solr/src/java/org/apache/solr/util/
CommonParams.java:178: cannot find symbol
[javac] symbol : variable HIGHLIGHT_FORMATTER_CLASS
[javac] location: class org.apache.solr.request.SolrParams
[javac] SolrCore.log.severe("init param is not a str: "
+ SolrParams.HIGHLIGHT_FORMATTER_CLASS);
[javac]
^
[javac] Note: Some input files use or override a deprecated API.
[javac] Note: Recompile with -Xlint:deprecation for details.
[javac] Note: Some input files use unchecked or unsafe operations.
[javac] Note: Recompile with -Xlint:unchecked for details.
[javac] 8 errors
On Feb 21, 2007, at 8:38 PM, klaas@apache.org wrote:
> Author: klaas
> Date: Wed Feb 21 17:38:47 2007
> New Revision: 510338
>
> URL: http://svn.apache.org/viewvc?view=rev&rev=510338
> Log:
> - moved highlighting-specific classes to Highlighting Utils.java
> - brace consistency in HU.java
> - removed unused final static in SolrPArams (should highlighting
> param defs be moved here?)
>
> Modified:
> lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
> lucene/solr/trunk/src/java/org/apache/solr/util/
> HighlightingUtils.java
> lucene/solr/trunk/src/java/org/apache/solr/util/
> SolrPluginUtils.java
>
> Modified: lucene/solr/trunk/src/java/org/apache/solr/request/
> SolrParams.java
> URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
> apache/solr/request/SolrParams.java?
> view=diff&rev=510338&r1=510337&r2=510338
> ======================================================================
> ========
> --- lucene/solr/trunk/src/java/org/apache/solr/request/
> SolrParams.java (original)
> +++ lucene/solr/trunk/src/java/org/apache/solr/request/
> SolrParams.java Wed Feb 21 17:38:47 2007
> @@ -60,14 +60,6 @@
> public static final String DEBUG_QUERY = "debugQuery";
> /** another query to explain against */
> public static final String EXPLAIN_OTHER = "explainOther";
> - /** wether to highlight */
> - public static final String HIGHLIGHT = "highlight";
> - /** fields to highlight */
> - public static final String HIGHLIGHT_FIELDS = "highlightFields";
> - /** maximum highlight fragments to return */
> - public static final String MAX_SNIPPETS = "maxSnippets";
> - /** override default highlight Formatter class */
> - public static final String HIGHLIGHT_FORMATTER_CLASS =
> "highlightFormatterClass";
>
> /**
> * Should facet counts be calculated?
>
> Modified: lucene/solr/trunk/src/java/org/apache/solr/util/
> HighlightingUtils.java
> URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
> apache/solr/util/HighlightingUtils.java?
> view=diff&rev=510338&r1=510337&r2=510338
> ======================================================================
> ========
> --- lucene/solr/trunk/src/java/org/apache/solr/util/
> HighlightingUtils.java (original)
> +++ lucene/solr/trunk/src/java/org/apache/solr/util/
> HighlightingUtils.java Wed Feb 21 17:38:47 2007
> @@ -18,11 +18,14 @@
>
> import java.io.IOException;
> import java.io.StringReader;
> -import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.HashSet;
> import java.util.Map;
> import java.util.Set;
> +import java.util.List;
> +import java.util.LinkedList;
> +import java.util.ArrayList;
> +import java.util.ListIterator;
>
> import org.apache.solr.request.*;
> import org.apache.solr.search.DocIterator;
> @@ -30,7 +33,7 @@
> import org.apache.solr.search.SolrIndexSearcher;
> import org.apache.solr.schema.SchemaField;
>
> -import org.apache.lucene.analysis.TokenStream;
> +import org.apache.lucene.analysis.*;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.search.Query;
> import org.apache.lucene.search.highlight.*;
> @@ -38,8 +41,7 @@
> /**
> * Collection of Utility and Factory methods for Highlighting.
> */
> -public class HighlightingUtils
> -{
> +public class HighlightingUtils {
> private static final String SIMPLE = "simple";
>
> private static final String HIGHLIGHT = "hl";
> @@ -53,8 +55,7 @@
> private static final String FIELD_MATCH = PREFIX
> +"requireFieldMatch";
>
> private static SolrParams DEFAULTS = null;
> - static
> - {
> + static {
> Map<String,String> map = new HashMap<String,String>();
> map.put(SNIPPETS, "1");
> map.put(FRAGSIZE, "100");
> @@ -66,8 +67,7 @@
> }
>
> /** Combine request parameters with highlighting defaults. */
> - private static SolrParams getParams(SolrQueryRequest request)
> - {
> + private static SolrParams getParams(SolrQueryRequest request) {
> return new DefaultSolrParams(request.getParams(), DEFAULTS);
> }
>
> @@ -76,8 +76,7 @@
> * @param request The current SolrQueryRequest
> * @return <code>true</code> if highlighting enabled,
> <code>false</code> if not.
> */
> - public static boolean isHighlightingEnabled(SolrQueryRequest
> request)
> - {
> + public static boolean isHighlightingEnabled(SolrQueryRequest
> request) {
> return getParams(request).getBool(HIGHLIGHT, false);
> }
>
> @@ -87,8 +86,7 @@
> * @param fieldName The name of the field
> * @param request The current SolrQueryRequest
> */
> - public static Highlighter getHighlighter(Query query, String
> fieldName, SolrQueryRequest request)
> - {
> + public static Highlighter getHighlighter(Query query, String
> fieldName, SolrQueryRequest request) {
> Highlighter highlighter = new Highlighter(
> getFormatter(fieldName, request),
> getQueryScorer(query, fieldName, request));
> @@ -102,15 +100,12 @@
> * @param fieldName The name of the field
> * @param request The SolrQueryRequest
> */
> - public static QueryScorer getQueryScorer(Query query, String
> fieldName, SolrQueryRequest request)
> - {
> + public static QueryScorer getQueryScorer(Query query, String
> fieldName, SolrQueryRequest request) {
> boolean reqFieldMatch = getParams(request).getFieldBool
> (fieldName, FIELD_MATCH, false);
> - if (reqFieldMatch)
> - {
> + if (reqFieldMatch) {
> return new QueryScorer(query, request.getSearcher
> ().getReader(), fieldName);
> }
> - else
> - {
> + else {
> return new QueryScorer(query);
> }
> }
> @@ -123,25 +118,20 @@
> * @param request The current SolrQueryRequest
> * @param defaultFields Programmatic default highlight fields,
> used if nothing is specified in the handler config or the request.
> */
> - public static String[] getHighlightFields(Query query,
> SolrQueryRequest request, String[] defaultFields)
> - {
> + public static String[] getHighlightFields(Query query,
> SolrQueryRequest request, String[] defaultFields) {
> String fields[] = getParams(request).getParams(FIELDS);
>
> // if no fields specified in the request, or the handler,
> fall back to programmatic default, or default search field.
> - if(emptyArray(fields))
> - {
> + if(emptyArray(fields)) {
> // use default search field if highlight fieldlist not
> specified.
> - if (emptyArray(defaultFields))
> - {
> + if (emptyArray(defaultFields)) {
> fields = new String[]{request.getSchema
> ().getDefaultSearchFieldName()};
> }
> - else
> - {
> + else {
> fields = defaultFields;
> }
> }
> - else if (fields.length == 1)
> - {
> + else if (fields.length == 1) {
> // if there's a single request/handler value, it may be a
> space/comma separated list
> fields = SolrPluginUtils.split(fields[0]);
> }
> @@ -149,8 +139,7 @@
> return fields;
> }
>
> - private static boolean emptyArray(String[] arr)
> - {
> + private static boolean emptyArray(String[] arr) {
> return (arr == null || arr.length == 0 || arr[0] == null ||
> arr[0].trim().length() == 0);
> }
>
> @@ -161,8 +150,7 @@
> * @param fieldName The name of the field
> * @param request The current SolrQueryRequest
> */
> - public static int getMaxSnippets(String fieldName,
> SolrQueryRequest request)
> - {
> + public static int getMaxSnippets(String fieldName,
> SolrQueryRequest request) {
> return Integer.parseInt(getParams(request).getFieldParam
> (fieldName, SNIPPETS));
> }
>
> @@ -175,8 +163,7 @@
> * @param request The current SolrQueryRequest
> * @return An appropriate Formatter.
> */
> - public static Formatter getFormatter(String fieldName,
> SolrQueryRequest request)
> - {
> + public static Formatter getFormatter(String fieldName,
> SolrQueryRequest request) {
> SolrParams p = getParams(request);
>
> // SimpleHTMLFormatter is the only supported Formatter at
> the moment
> @@ -192,8 +179,7 @@
> * @param request The current SolrQueryRequest
> * @return An appropriate Fragmenter.
> */
> - public static Fragmenter getFragmenter(String fieldName,
> SolrQueryRequest request)
> - {
> + public static Fragmenter getFragmenter(String fieldName,
> SolrQueryRequest request) {
> int fragsize = Integer.parseInt(getParams
> (request).getFieldParam(fieldName, FRAGSIZE));
> return (fragsize <= 0) ? new NullFragmenter() : new
> GapFragmenter(fragsize);
> }
> @@ -210,8 +196,7 @@
> * @return NamedList containing a NamedList for each document,
> which in
> * turns contains sets (field, summary) pairs.
> */
> - public static NamedList doHighlighting(DocList docs, Query
> query, SolrQueryRequest req, String[] defaultFields) throws
> IOException
> - {
> + public static NamedList doHighlighting(DocList docs, Query
> query, SolrQueryRequest req, String[] defaultFields) throws
> IOException {
> if (!isHighlightingEnabled(req))
> return null;
>
> @@ -232,13 +217,11 @@
>
> // Highlight each document
> DocIterator iterator = docs.iterator();
> - for (int i = 0; i < docs.size(); i++)
> - {
> + for (int i = 0; i < docs.size(); i++) {
> int docId = iterator.nextDoc();
> Document doc = readDocs[i];
> NamedList docSummaries = new SimpleOrderedMap();
> - for (String fieldName : fieldNames)
> - {
> + for (String fieldName : fieldNames) {
> fieldName = fieldName.trim();
> String[] docTexts = doc.getValues(fieldName);
> if (docTexts == null) continue;
> @@ -249,24 +232,20 @@
>
> String[] summaries;
> TextFragment[] frag;
> - if (docTexts.length == 1)
> - {
> + if (docTexts.length == 1) {
> // single-valued field
> TokenStream tstream;
> - try
> - {
> + try {
> // attempt term vectors
> tstream = TokenSources.getTokenStream
> (searcher.getReader(), docId, fieldName);
> }
> - catch (IllegalArgumentException e)
> - {
> + catch (IllegalArgumentException e) {
> // fall back to analyzer
> tstream = new TokenOrderingFilter
> (searcher.getSchema().getAnalyzer().tokenStream(fieldName, new
> StringReader(docTexts[0])), 10);
> }
> frag = highlighter.getBestTextFragments(tstream,
> docTexts[0], false, numFragments);
> }
> - else
> - {
> + else {
> // multi-valued field
> MultiValueTokenStream tstream;
> tstream = new MultiValueTokenStream(fieldName,
> docTexts, searcher.getSchema().getAnalyzer(), true);
> @@ -274,18 +253,16 @@
> }
> // convert fragments back into text
> // TODO: we can include score and position information
> in output as snippet attributes
> - if (frag.length > 0)
> - {
> + if (frag.length > 0) {
> ArrayList<String> fragTexts = new ArrayList<String>();
> - for (int j = 0; j < frag.length; j++)
> - {
> - if ((frag[j] != null) && (frag[j].getScore() > 0))
> - {
> + for (int j = 0; j < frag.length; j++) {
> + if ((frag[j] != null) && (frag[j].getScore() >
> 0)) {
> fragTexts.add(frag[j].toString());
> }
> }
> summaries = fragTexts.toArray(new String[0]);
> - if (summaries.length > 0) docSummaries.add
> (fieldName, summaries);
> + if (summaries.length > 0)
> + docSummaries.add(fieldName, summaries);
> }
> }
> String printId = searcher.getSchema().printableUniqueKey
> (doc);
> @@ -293,4 +270,161 @@
> }
> return fragments;
> }
> +}
> +
> +/**
> + * Helper class which creates a single TokenStream out of values
> from a
> + * multi-valued field.
> + */
> +class MultiValueTokenStream extends TokenStream {
> + private String fieldName;
> + private String[] values;
> + private Analyzer analyzer;
> + private int curIndex; // next index into the
> values array
> + private int curOffset; // offset into
> concatenated string
> + private TokenStream currentStream; // tokenStream currently
> being iterated
> + private boolean orderTokenOffsets;
> +
> + /** Constructs a TokenStream for consecutively-analyzed field
> values
> + *
> + * @param fieldName name of the field
> + * @param values array of field data
> + * @param analyzer analyzer instance
> + */
> + public MultiValueTokenStream(String fieldName, String[] values,
> + Analyzer analyzer, boolean
> orderTokenOffsets) {
> + this.fieldName = fieldName;
> + this.values = values;
> + this.analyzer = analyzer;
> + curIndex = -1;
> + curOffset = 0;
> + currentStream = null;
> + this.orderTokenOffsets=orderTokenOffsets;
> + }
> +
> + /** Returns the next token in the stream, or null at EOS. */
> + public Token next() throws IOException {
> + int extra = 0;
> + if(currentStream == null) {
> + curIndex++;
> + if(curIndex < values.length) {
> + currentStream = analyzer.tokenStream(fieldName,
> + new StringReader
> (values[curIndex]));
> + if (orderTokenOffsets) currentStream = new
> TokenOrderingFilter(currentStream,10);
> + // add extra space between multiple values
> + if(curIndex > 0)
> + extra = analyzer.getPositionIncrementGap(fieldName);
> + } else {
> + return null;
> + }
> + }
> + Token nextToken = currentStream.next();
> + if(nextToken == null) {
> + curOffset += values[curIndex].length();
> + currentStream = null;
> + return next();
> + }
> + // create an modified token which is the offset into the
> concatenated
> + // string of all values
> + Token offsetToken = new Token(nextToken.termText(),
> + nextToken.startOffset() +
> curOffset,
> + nextToken.endOffset() + curOffset);
> + offsetToken.setPositionIncrement(nextToken.getPositionIncrement
> () + extra*10);
> + return offsetToken;
> + }
> +
> + /**
> + * Returns all values as a single String into which the Tokens
> index with
> + * their offsets.
> + */
> + public String asSingleValue() {
> + StringBuilder sb = new StringBuilder();
> + for(String str : values)
> + sb.append(str);
> + return sb.toString();
> + }
> +
> +}
> +
> +/**
> + * A simple modification of SimpleFragmenter which additionally
> creates new
> + * fragments when an unusually-large position increment is
> encountered
> + * (this behaves much better in the presence of multi-valued fields).
> + */
> +class GapFragmenter extends SimpleFragmenter {
> + /**
> + * When a gap in term positions is observed that is at least
> this big, treat
> + * the gap as a fragment delimiter.
> + */
> + public static final int INCREMENT_THRESHOLD = 50;
> + protected int fragOffsetAccum = 0;
> +
> + public GapFragmenter() {
> + }
> +
> + public GapFragmenter(int fragsize) {
> + super(fragsize);
> + }
> +
> + /* (non-Javadoc)
> + * @see org.apache.lucene.search.highlight.TextFragmenter#start
> (java.lang.String)
> + */
> + public void start(String originalText) {
> + fragOffsetAccum = 0;
> + }
> +
> + /* (non-Javadoc)
> + * @see
> org.apache.lucene.search.highlight.TextFragmenter#isNewFragment
> (org.apache.lucene.analysis.Token)
> + */
> + public boolean isNewFragment(Token token) {
> + boolean isNewFrag =
> + token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
> + token.getPositionIncrement() > INCREMENT_THRESHOLD;
> + if(isNewFrag) {
> + fragOffsetAccum += token.endOffset() - fragOffsetAccum;
> + }
> + return isNewFrag;
> + }
> +}
> +
> +/** Orders Tokens in a window first by their startOffset ascending.
> + * endOffset is currently ignored.
> + * This is meant to work around fickleness in the highlighter
> only. It
> + * can mess up token positions and should not be used for indexing
> or querying.
> + */
> +class TokenOrderingFilter extends TokenFilter {
> + private final int windowSize;
> + private final LinkedList<Token> queue = new LinkedList<Token>();
> + private boolean done=false;
> +
> + protected TokenOrderingFilter(TokenStream input, int windowSize) {
> + super(input);
> + this.windowSize = windowSize;
> + }
> +
> + public Token next() throws IOException {
> + while (!done && queue.size() < windowSize) {
> + Token newTok = input.next();
> + if (newTok==null) {
> + done=true;
> + break;
> + }
> +
> + // reverse iterating for better efficiency since we know the
> + // list is already sorted, and most token start offsets will
> be too.
> + ListIterator<Token> iter = queue.listIterator(queue.size());
> + while(iter.hasPrevious()) {
> + if (newTok.startOffset() >= iter.previous().startOffset()) {
> + // insertion will be before what next() would return (what
> + // we just compared against), so move back one so the
> insertion
> + // will be after.
> + iter.next();
> + break;
> + }
> + }
> + iter.add(newTok);
> + }
> +
> + return queue.isEmpty() ? null : queue.removeFirst();
> + }
> }
>
> Modified: lucene/solr/trunk/src/java/org/apache/solr/util/
> SolrPluginUtils.java
> URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
> apache/solr/util/SolrPluginUtils.java?
> view=diff&rev=510338&r1=510337&r2=510338
> ======================================================================
> ========
> --- lucene/solr/trunk/src/java/org/apache/solr/util/
> SolrPluginUtils.java (original)
> +++ lucene/solr/trunk/src/java/org/apache/solr/util/
> SolrPluginUtils.java Wed Feb 21 17:38:47 2007
> @@ -18,15 +18,11 @@
> package org.apache.solr.util;
>
> import org.apache.lucene.analysis.Analyzer;
> -import org.apache.lucene.analysis.Token;
> -import org.apache.lucene.analysis.TokenFilter;
> -import org.apache.lucene.analysis.TokenStream;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.queryParser.ParseException;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.search.BooleanClause.Occur;
> -import org.apache.lucene.search.highlight.*;
> import org.apache.solr.core.SolrCore;
> import org.apache.solr.core.SolrException;
> import org.apache.solr.request.SolrParams;
> @@ -849,156 +845,5 @@
>
> }
>
> -/**
> - * Helper class which creates a single TokenStream out of values
> from a
> - * multi-valued field.
> - */
> -class MultiValueTokenStream extends TokenStream {
> - private String fieldName;
> - private String[] values;
> - private Analyzer analyzer;
> - private int curIndex; // next index into the
> values array
> - private int curOffset; // offset into
> concatenated string
> - private TokenStream currentStream; // tokenStream currently
> being iterated
> - private boolean orderTokenOffsets;
> -
> - /** Constructs a TokenStream for consecutively-analyzed field
> values
> - *
> - * @param fieldName name of the field
> - * @param values array of field data
> - * @param analyzer analyzer instance
> - */
> - public MultiValueTokenStream(String fieldName, String[] values,
> - Analyzer analyzer, boolean
> orderTokenOffsets) {
> - this.fieldName = fieldName;
> - this.values = values;
> - this.analyzer = analyzer;
> - curIndex = -1;
> - curOffset = 0;
> - currentStream = null;
> - this.orderTokenOffsets=orderTokenOffsets;
> - }
> -
> - /** Returns the next token in the stream, or null at EOS. */
> - public Token next() throws IOException {
> - int extra = 0;
> - if(currentStream == null) {
> - curIndex++;
> - if(curIndex < values.length) {
> - currentStream = analyzer.tokenStream(fieldName,
> - new StringReader
> (values[curIndex]));
> - if (orderTokenOffsets) currentStream = new
> TokenOrderingFilter(currentStream,10);
> - // add extra space between multiple values
> - if(curIndex > 0)
> - extra = analyzer.getPositionIncrementGap(fieldName);
> - } else {
> - return null;
> - }
> - }
> - Token nextToken = currentStream.next();
> - if(nextToken == null) {
> - curOffset += values[curIndex].length();
> - currentStream = null;
> - return next();
> - }
> - // create an modified token which is the offset into the
> concatenated
> - // string of all values
> - Token offsetToken = new Token(nextToken.termText(),
> - nextToken.startOffset() +
> curOffset,
> - nextToken.endOffset() + curOffset);
> - offsetToken.setPositionIncrement(nextToken.getPositionIncrement
> () + extra*10);
> - return offsetToken;
> - }
> -
> - /**
> - * Returns all values as a single String into which the Tokens
> index with
> - * their offsets.
> - */
> - public String asSingleValue() {
> - StringBuilder sb = new StringBuilder();
> - for(String str : values)
> - sb.append(str);
> - return sb.toString();
> - }
> -
> -}
> -
> -/**
> - * A simple modification of SimpleFragmenter which additionally
> creates new
> - * fragments when an unusually-large position increment is
> encountered
> - * (this behaves much better in the presence of multi-valued fields).
> - */
> -class GapFragmenter extends SimpleFragmenter {
> - public static final int INCREMENT_THRESHOLD = 50;
> - protected int fragOffsetAccum = 0;
> -
> - public GapFragmenter() {
> - }
> -
> - public GapFragmenter(int fragsize) {
> - super(fragsize);
> - }
> -
> - /* (non-Javadoc)
> - * @see org.apache.lucene.search.highlight.TextFragmenter#start
> (java.lang.String)
> - */
> - public void start(String originalText) {
> - fragOffsetAccum = 0;
> - }
>
> - /* (non-Javadoc)
> - * @see
> org.apache.lucene.search.highlight.TextFragmenter#isNewFragment
> (org.apache.lucene.analysis.Token)
> - */
> - public boolean isNewFragment(Token token) {
> - boolean isNewFrag =
> - token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
> - token.getPositionIncrement() > INCREMENT_THRESHOLD;
> - if(isNewFrag) {
> - fragOffsetAccum += token.endOffset() - fragOffsetAccum;
> - }
> - return isNewFrag;
> - }
> -}
>
> -
> -/** Orders Tokens in a window first by their startOffset ascending.
> - * endOffset is currently ignored.
> - * This is meant to work around fickleness in the highlighter
> only. It
> - * can mess up token positions and should not be used for indexing
> or querying.
> - */
> -class TokenOrderingFilter extends TokenFilter {
> - private final int windowSize;
> - private final LinkedList<Token> queue = new LinkedList<Token>();
> - private boolean done=false;
> -
> - protected TokenOrderingFilter(TokenStream input, int windowSize) {
> - super(input);
> - this.windowSize = windowSize;
> - }
> -
> - public Token next() throws IOException {
> - while (!done && queue.size() < windowSize) {
> - Token newTok = input.next();
> - if (newTok==null) {
> - done=true;
> - break;
> - }
> -
> - // reverse iterating for better efficiency since we know the
> - // list is already sorted, and most token start offsets will
> be too.
> - ListIterator<Token> iter = queue.listIterator(queue.size());
> - while(iter.hasPrevious()) {
> - if (newTok.startOffset() >= iter.previous().startOffset()) {
> - // insertion will be before what next() would return (what
> - // we just compared against), so move back one so the
> insertion
> - // will be after.
> - iter.next();
> - break;
> - }
> - }
> - iter.add(newTok);
> - }
> -
> - return queue.isEmpty() ? null : queue.removeFirst();
> - }
> -}
>