You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/04/10 04:09:47 UTC
svn commit: r763856 - in /lucene/java/trunk/contrib/highlighter/src:
java/org/apache/lucene/search/highlight/
test/org/apache/lucene/search/highlight/
Author: markrmiller
Date: Fri Apr 10 02:09:46 2009
New Revision: 763856
URL: http://svn.apache.org/viewvc?rev=763856&view=rev
Log:
Add ConstantScore highlighting support to SpanScorer
Modified:
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java?rev=763856&r1=763855&r2=763856&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SpanScorer.java Fri Apr 10 02:09:46 2009
@@ -9,6 +9,7 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.Query;
@@ -38,7 +39,25 @@
*/
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter) throws IOException {
- init(query, field, cachingTokenFilter, null);
+ init(query, field, cachingTokenFilter, null, false);
+ }
+
+
+ /**
+ * @param query
+ * Query to use for highlighting
+ * @param field
+ * Field to highlight - pass null to ignore fields
+ * @param tokenStream
+ * of source text to be highlighted
+ * @param expandMultiTermQuery
+ * rewrite multi-term queries against a single doc memory index to
+ * create boolean queries
+ * @throws IOException
+ */
+ public SpanScorer(Query query, String field,
+ CachingTokenFilter cachingTokenFilter, boolean expandMultiTermQuery) throws IOException {
+ init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
}
/**
@@ -54,7 +73,26 @@
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, IndexReader reader)
throws IOException {
- init(query, field, cachingTokenFilter, reader);
+ init(query, field, cachingTokenFilter, reader, false);
+ }
+
+ /**
+ * @param query
+ * Query to use for highlighting
+ * @param field
+ * Field to highlight - pass null to ignore fields
+ * @param tokenStream
+ * of source text to be highlighted
+ * @param reader
+ * @param expandMultiTermQuery
+ * rewrite multi-term queries against a single doc memory index to
+ * create boolean queries
+ * @throws IOException
+ */
+ public SpanScorer(Query query, String field,
+ CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
+ throws IOException {
+ init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
}
/**
@@ -64,7 +102,17 @@
CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField)
throws IOException {
this.defaultField = defaultField.intern();
- init(query, field, cachingTokenFilter, reader);
+ init(query, field, cachingTokenFilter, reader, false);
+ }
+
+ /**
+ * As above, but with ability to pass in an <tt>IndexReader</tt>
+ */
+ public SpanScorer(Query query, String field,
+ CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField, boolean expandMultiTermQuery)
+ throws IOException {
+ this.defaultField = defaultField.intern();
+ init(query, field, cachingTokenFilter, reader, expandMultiTermQuery);
}
/**
@@ -73,7 +121,16 @@
public SpanScorer(Query query, String field,
CachingTokenFilter cachingTokenFilter, String defaultField) throws IOException {
this.defaultField = defaultField.intern();
- init(query, field, cachingTokenFilter, null);
+ init(query, field, cachingTokenFilter, null, false);
+ }
+
+ /**
+ * @param defaultField - The default field for queries with the field name unspecified
+ */
+ public SpanScorer(Query query, String field,
+ CachingTokenFilter cachingTokenFilter, String defaultField, boolean expandMultiTermQuery) throws IOException {
+ this.defaultField = defaultField.intern();
+ init(query, field, cachingTokenFilter, null, expandMultiTermQuery);
}
/**
@@ -165,13 +222,13 @@
* @throws IOException
*/
private void init(Query query, String field,
- CachingTokenFilter cachingTokenFilter, IndexReader reader)
+ CachingTokenFilter cachingTokenFilter, IndexReader reader, boolean expandMultiTermQuery)
throws IOException {
WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
: new WeightedSpanTermExtractor(defaultField);
qse.setHighlightCnstScrRngQuery(highlightCnstScrRngQuery);
-
+ qse.setExpandMultiTermQuery(expandMultiTermQuery);
if (reader == null) {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
cachingTokenFilter, field);
@@ -183,6 +240,8 @@
/**
* @return whether ConstantScoreRangeQuerys are set to be highlighted
+ * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
+ * constructor option to expand MultiTerm queries.
*/
public static boolean isHighlightCnstScrRngQuery() {
return highlightCnstScrRngQuery;
@@ -197,10 +256,13 @@
}
/**
- * Turns highlighting of ConstantScoreRangeQuery on/off. ConstantScoreRangeQuerys cannot be
- * highlighted if you rewrite the query first. Must be called before SpanScorer construction.
+ * Turns highlighting of ConstantScoreRangeQuery on/off.
+ * ConstantScoreRangeQuerys cannot be highlighted if you rewrite the query
+ * first. Must be called before SpanScorer construction.
*
* @param highlightCnstScrRngQuery
+ * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
+ * constructor option to expand MultiTerm queries.
*/
public static void setHighlightCnstScrRngQuery(boolean highlight) {
highlightCnstScrRngQuery = highlight;
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=763856&r1=763855&r2=763856&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Fri Apr 10 02:09:46 2009
@@ -38,11 +38,16 @@
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
+import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
@@ -59,6 +64,7 @@
private Map readers = new HashMap(10); // Map<String, IndexReader>
private String defaultField;
private boolean highlightCnstScrRngQuery;
+ private boolean expandMultiTermQuery;
public WeightedSpanTermExtractor() {
}
@@ -131,6 +137,14 @@
extract((Query) iterator.next(), disjunctTerms);
}
terms.putAll(disjunctTerms);
+ } else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
+ MultiTermQuery mtq = ((MultiTermQuery)query);
+ if(mtq.getConstantScoreRewrite()) {
+ query = copyMultiTermQuery(mtq);
+ mtq.setConstantScoreRewrite(false);
+ }
+ IndexReader ir = getReaderForField(fieldName);
+ extract(query.rewrite(ir), terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List termArrays = mpq.getTermArrays();
@@ -179,27 +193,7 @@
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
- } else if (highlightCnstScrRngQuery && query instanceof ConstantScoreRangeQuery) {
- ConstantScoreRangeQuery q = (ConstantScoreRangeQuery) query;
- Term lower = new Term(fieldName, q.getLowerVal());
- Term upper = new Term(fieldName, q.getUpperVal());
- FilterIndexReader fir = new FilterIndexReader(getReaderForField(fieldName));
- try {
- TermEnum te = fir.terms(lower);
- BooleanQuery bq = new BooleanQuery();
- do {
- Term term = te.term();
- if (term != null && upper.compareTo(term) >= 0) {
- bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
- } else {
- break;
- }
- } while (te.next());
- extract(bq, terms);
- } finally {
- fir.close();
- }
- }
+ }
}
/**
@@ -425,10 +419,19 @@
return terms;
}
+ /**
+ * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use
+ * getExpandMultiTermQuery instead.
+ */
public boolean isHighlightCnstScrRngQuery() {
return highlightCnstScrRngQuery;
}
-
+
+ /**
+ * @param highlightCnstScrRngQuery
+ * @deprecated {@link ConstantScoreRangeQuery} is deprecated. Use the
+ * setExpandMultiTermQuery option.
+ */
public void setHighlightCnstScrRngQuery(boolean highlightCnstScrRngQuery) {
this.highlightCnstScrRngQuery = highlightCnstScrRngQuery;
}
@@ -460,4 +463,35 @@
}
}
+
+ private Query copyMultiTermQuery(MultiTermQuery query) {
+ if(query instanceof RangeQuery) {
+ RangeQuery q = (RangeQuery)query;
+ q.setBoost(query.getBoost());
+ return new RangeQuery(q.getField(), q.getLowerTermText(), q.getUpperTermText(), q.includesLower(), q.includesUpper());
+ } else if(query instanceof WildcardQuery) {
+ Query q = new WildcardQuery(query.getTerm());
+ q.setBoost(query.getBoost());
+ return q;
+ } else if(query instanceof PrefixQuery) {
+ Query q = new PrefixQuery(query.getTerm());
+ q.setBoost(q.getBoost());
+ return q;
+ } else if(query instanceof FuzzyQuery) {
+ FuzzyQuery q = (FuzzyQuery)query;
+ q.setBoost(q.getBoost());
+ return new FuzzyQuery(q.getTerm(), q.getMinSimilarity(), q.getPrefixLength());
+ }
+
+ return query;
+ }
+
+
+ public boolean getExpandMultiTermQuery() {
+ return expandMultiTermQuery;
+ }
+
+ public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
+ this.expandMultiTermQuery = expandMultiTermQuery;
+ }
}
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html?rev=763856&r1=763855&r2=763856&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/spanscorer.html Fri Apr 10 02:09:46 2009
@@ -16,12 +16,19 @@
matching Spans are recorded with the respective WeightedSpanTerms and these positions are
then used to filter possible Token matches during scoring.
</p>
+<p>
+Unlike the QueryScorer, you do not want to rewrite the query first with the SpanScorer for
+multi term query handling ie wildcard, fuzzy, range.
+The SpanScorer constructors provide an option to enable the highlighting of multi-term queries.
+If this option is enabled, the SpanScorer will rewrite the query against a single doc index
+containing the doc to be highlighted, rather than against the full index. If you do rewrite the
+query first, certain multi-term queries may not highlight correctly.
+</p>
<h2>Example Usage</h2>
<pre>
IndexSearcher searcher = new IndexSearcher(ramDir);
Query query = QueryParser.parse("Kenne*", FIELD_NAME, analyzer);
- query = query.rewrite(reader); //required to expand search terms
Hits hits = searcher.search(query);
for (int i = 0; i < hits.length(); i++)
@@ -29,7 +36,7 @@
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(
FIELD_NAME, new StringReader(text)));
- Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream));
+ Highlighter highlighter = new Highlighter(new SpanScorer(query, FIELD_NAME, tokenStream, true));
tokenStream.reset();
// Get 3 best fragments and seperate with a "..."
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=763856&r1=763855&r2=763856&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Fri Apr 10 02:09:46 2009
@@ -63,6 +63,7 @@
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -437,7 +438,7 @@
public void run() throws Exception {
numHighlights = 0;
doSearching("Kinnedy~");
- doStandardHighlights(analyzer, hits, query, HighlighterTest.this);
+ doStandardHighlights(analyzer, hits, query, HighlighterTest.this, true);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
}
@@ -539,6 +540,45 @@
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
}
+
+ public void testConstantScoreMultiTermQuery() throws Exception {
+
+ numHighlights = 0;
+
+ query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
+ ((WildcardQuery)query).setConstantScoreRewrite(true);
+ searcher = new IndexSearcher(ramDir);
+ // can't rewrite ConstantScore if you want to highlight it -
+ // it rewrites to ConstantScoreQuery which cannot be highlighted
+ // query = unReWrittenQuery.rewrite(reader);
+ System.out.println("Searching for: " + query.toString(FIELD_NAME));
+ hits = searcher.search(query);
+
+ for (int i = 0; i < hits.length(); i++) {
+ String text = hits.doc(i).get(HighlighterTest.FIELD_NAME);
+ int maxNumFragmentsRequired = 2;
+ String fragmentSeparator = "...";
+ SpanScorer scorer = null;
+ TokenStream tokenStream = null;
+
+ tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
+ new StringReader(text)));
+
+ scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME, (CachingTokenFilter) tokenStream, true);
+
+ Highlighter highlighter = new Highlighter(this, scorer);
+
+ ((CachingTokenFilter) tokenStream).reset();
+
+ highlighter.setTextFragmenter(new SimpleFragmenter(20));
+
+ String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
+ fragmentSeparator);
+ System.out.println("\t" + result);
+ }
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+ numHighlights == 5);
+ }
public void testGetBestFragmentsPhrase() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@@ -1565,6 +1605,11 @@
}
void doStandardHighlights(Analyzer analyzer, Hits hits, Query query, Formatter formatter)
+ throws Exception {
+ doStandardHighlights(analyzer, hits, query, formatter, false);
+ }
+
+ void doStandardHighlights(Analyzer analyzer, Hits hits, Query query, Formatter formatter, boolean expandMT)
throws Exception {
for (int i = 0; i < hits.length(); i++) {
@@ -1577,7 +1622,7 @@
tokenStream = new CachingTokenFilter(analyzer.tokenStream(HighlighterTest.FIELD_NAME,
new StringReader(text)));
scorer = new SpanScorer(query, HighlighterTest.FIELD_NAME,
- (CachingTokenFilter) tokenStream);
+ (CachingTokenFilter) tokenStream, expandMT);
} else if (mode == STANDARD) {
scorer = new QueryScorer(query);
tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));