You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/07/29 18:47:06 UTC
svn commit: r798976 - in /lucene/java/trunk/contrib: CHANGES.txt
highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Author: markrmiller
Date: Wed Jul 29 16:47:05 2009
New Revision: 798976
URL: http://svn.apache.org/viewvc?rev=798976&view=rev
Log:
LUCENE-1752: Missing highlights when terms were repeated in separate, nested, boolean or disjunction queries.
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=798976&r1=798975&r2=798976&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Wed Jul 29 16:47:05 2009
@@ -41,6 +41,9 @@
8. LUCENE-1491: EdgeNGramTokenFilter no longer stops on tokens shorter than minimum n-gram size.
(Todd Teak via Otis Gospodnetic)
+
+ 9. LUCENE-1752: Missing highlights when terms were repeated in separate, nested, boolean or
+ disjunction queries. (Koji Sekiguchi, Mark Miller)
New features
Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=798976&r1=798975&r2=798976&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Wed Jul 29 16:47:05 2009
@@ -98,13 +98,12 @@
private void extract(Query query, Map terms) throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
- Map booleanTerms = new PositionCheckingMap();
+
for (int i = 0; i < queryClauses.length; i++) {
if (!queryClauses[i].isProhibited()) {
- extract(queryClauses[i].getQuery(), booleanTerms);
+ extract(queryClauses[i].getQuery(), terms);
}
}
- terms.putAll(booleanTerms);
} else if (query instanceof PhraseQuery) {
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
@@ -129,11 +128,9 @@
} else if (query instanceof FilteredQuery) {
extract(((FilteredQuery) query).getQuery(), terms);
} else if (query instanceof DisjunctionMaxQuery) {
- Map disjunctTerms = new PositionCheckingMap();
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
- extract((Query) iterator.next(), disjunctTerms);
+ extract((Query) iterator.next(), terms);
}
- terms.putAll(disjunctTerms);
} else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
MultiTermQuery mtq = ((MultiTermQuery)query);
if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=798976&r1=798975&r2=798976&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Jul 29 16:47:05 2009
@@ -53,13 +53,13 @@
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiSearcher;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeFilter;
@@ -178,6 +178,31 @@
// throw any exceptions
}
+ // LUCENE-1752
+ public void testRepeatingTermsInMultBooleans() throws Exception {
+ String content = "x y z a b c d e f g b c g";
+ String ph1 = "\"a b c d\"";
+ String ph2 = "\"b c g\"";
+ String f1 = "f1";
+ String f2 = "f2";
+ String f1c = f1 + ":";
+ String f2c = f2 + ":";
+ String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
+ + " OR " + f2c + ph2 + ")";
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ QueryParser qp = new QueryParser(f1, analyzer);
+ Query query = qp.parse(q);
+ CachingTokenFilter stream = new CachingTokenFilter(analyzer.tokenStream(f1,
+ new StringReader(content)));
+ Scorer scorer = new SpanScorer(query, f1, stream, false);
+ Highlighter h = new Highlighter(this, scorer);
+
+ h.getBestFragment(analyzer, f1, content);
+
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+ numHighlights == 7);
+ }
+
public void testSimpleSpanPhraseHighlighting() throws Exception {
doSearching("\"very long and contains\"");