You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/11/08 21:13:39 UTC
lucene-solr:master: LUCENE-7544: UnifiedHighlighter: extension hooks
for custom query handling
Repository: lucene-solr
Updated Branches:
refs/heads/master b02e7a902 -> da841be88
LUCENE-7544: UnifiedHighlighter: extension hooks for custom query handling
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/da841be8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/da841be8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/da841be8
Branch: refs/heads/master
Commit: da841be88774e7a00b4dd954399c7e997bf15d1a
Parents: b02e7a9
Author: David Smiley <ds...@apache.org>
Authored: Tue Nov 8 16:13:32 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Tue Nov 8 16:13:32 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../uhighlight/AnalysisOffsetStrategy.java | 15 +++-
.../uhighlight/MultiTermHighlighting.java | 32 ++++---
.../lucene/search/uhighlight/PhraseHelper.java | 19 ++++-
.../search/uhighlight/UnifiedHighlighter.java | 35 +++++++-
.../uhighlight/TestUnifiedHighlighterMTQ.java | 88 ++++++++++++++++++++
.../TestUnifiedHighlighterStrictPhrases.java | 77 +++++++++++++++++
7 files changed, 250 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0ccb5ee..f64e9e8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -67,6 +67,9 @@ Improvements
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
(Marc Morissette via David Smiley)
+* LUCENE-7544: UnifiedHighlighter: add extension points for handling custom queries.
+ (Michael Braun, David Smiley)
+
======================= Lucene 6.3.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
index 553a636..6b4cc74 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java
@@ -19,8 +19,10 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.List;
+import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.FilteringTokenFilter;
@@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
@@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
private final LeafReader leafReader;
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
- public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
+ public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
+ CharacterRunAutomaton[] automata, Analyzer analyzer,
+ Function<Query, Collection<Query>> multiTermQueryRewrite) {
super(field, extractedTerms, phraseHelper, automata);
this.analyzer = analyzer;
// Automata (Wildcards / MultiTermQuery):
@@ -68,7 +73,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
// preFilter for MemoryIndex
- preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases);
+ preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
+ multiTermQueryRewrite);
} else {
memoryIndex = null;
leafReader = null;
@@ -155,7 +161,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
*/
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
CharacterRunAutomaton[] automata,
- PhraseHelper strictPhrases) {
+ PhraseHelper strictPhrases,
+ Function<Query, Collection<Query>> multiTermQueryRewrite) {
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
if (terms.length > 0) {
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
@@ -163,7 +170,7 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
Collections.addAll(allAutomata, automata);
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
Collections.addAll(allAutomata,
- MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan
+ MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
}
if (allAutomata.size() == 1) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index 9498af5..e85fa3b 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -20,8 +20,10 @@ import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Comparator;
import java.util.List;
+import java.util.function.Function;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -69,34 +71,44 @@ class MultiTermHighlighting {
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
* automata that will match terms.
*/
- public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) {
+ public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
+ Function<Query, Collection<Query>> preRewriteFunc) {
List<CharacterRunAutomaton> list = new ArrayList<>();
- if (query instanceof BooleanQuery) {
+ Collection<Query> customSubQueries = preRewriteFunc.apply(query);
+ if (customSubQueries != null) {
+ for (Query sub : customSubQueries) {
+ list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+ }
+ } else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
- list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
- list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
+ preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
- list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
- list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
- list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
- list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
+ preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
- list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
+ preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
- list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan)));
+ list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
+ lookInSpan, preRewriteFunc)));
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (aq.getField().equals(field)) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index 5225041..95d51c9 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -40,7 +40,7 @@ import java.util.function.Function;
public class PhraseHelper {
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
- spanQuery -> null, true);
+ spanQuery -> null, query -> null, true);
//TODO it seems this ought to be a general thing on Spans?
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@@ -69,11 +69,14 @@ public class PhraseHelper {
* {@code rewriteQueryPred} is an extension hook to override the default choice of
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
+ * Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
+ * to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
*/
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
- boolean ignoreQueriesNeedingRewrite) {
+ Function<Query, Collection<Query>> preExtractRewriteFunction,
+ boolean ignoreQueriesNeedingRewrite) {
this.fieldName = field; // if null then don't require field match
// filter terms to those we want
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
@@ -99,6 +102,18 @@ public class PhraseHelper {
}
@Override
+ protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
+ Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
+ if (newQueriesToExtract != null) {
+ for (Query newQuery : newQueriesToExtract) {
+ extract(newQuery, boost, terms);
+ }
+ } else {
+ super.extract(query, boost, terms);
+ }
+ }
+
+ @Override
protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
if (clazz.isAssignableFrom(MultiTermQuery.class)) {
return true; //We do MTQ processing separately in MultiTermHighlighting.java
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index 72be180..5f09d84 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
@@ -732,7 +733,8 @@ public class UnifiedHighlighter {
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
switch (offsetSource) {
case ANALYSIS:
- return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
+ return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
+ this::preMultiTermQueryRewrite);
case NONE_NEEDED:
return NoOpOffsetStrategy.INSTANCE;
case TERM_VECTORS:
@@ -776,13 +778,14 @@ public class UnifiedHighlighter {
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
return highlightPhrasesStrictly ?
- new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) :
+ new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
PhraseHelper.NONE;
}
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
- ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES))
+ ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
+ this::preMultiTermQueryRewrite)
: ZERO_LEN_AUTOMATA_ARRAY;
}
@@ -830,6 +833,32 @@ public class UnifiedHighlighter {
return null;
}
+ /**
+ * When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
+ * {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}.
+ * Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
+ * or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
+ *
+ * @param query Query to be highlighted
+ * @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
+ */
+ protected Collection<Query> preSpanQueryRewrite(Query query) {
+ return null;
+ }
+
+ /**
+ * When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
+ * by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
+ * of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
+ * default rules to apply.
+ *
+ * @param query Query to be highlighted
+ * @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
+ */
+ protected Collection<Query> preMultiTermQueryRewrite(Query query) {
+ return null;
+ }
+
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
return new DocIdSetIterator() {
int idx = -1;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 63f0bb1..ddc9507 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -20,6 +20,8 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.Analyzer;
@@ -56,6 +58,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@@ -933,4 +936,89 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
ir.close();
}
+ public void testCustomSpanQueryHighlighting() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+ Document doc = new Document();
+ doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
+ doc.add(newTextField("id", "id", Field.Store.YES));
+
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected List<Query> preMultiTermQueryRewrite(Query query) {
+ if (query instanceof MyWrapperSpanQuery) {
+ return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
+ }
+ return null;
+ }
+ };
+
+ int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
+ SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
+
+ SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
+
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(wrappedQuery, BooleanClause.Occur.SHOULD)
+ .build();
+
+ int[] docIds = new int[]{docId};
+
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
+ ir.close();
+ }
+
+ private static class MyWrapperSpanQuery extends SpanQuery {
+
+ private final SpanQuery originalQuery;
+
+ private MyWrapperSpanQuery(SpanQuery originalQuery) {
+ this.originalQuery = Objects.requireNonNull(originalQuery);
+ }
+
+ @Override
+ public String getField() {
+ return originalQuery.getField();
+ }
+
+ @Override
+ public String toString(String field) {
+ return "(Wrapper[" + originalQuery.toString(field)+"])";
+ }
+
+ @Override
+ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+ return originalQuery.createWeight(searcher, needsScores, boost);
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query newOriginalQuery = originalQuery.rewrite(reader);
+ if (newOriginalQuery != originalQuery) {
+ return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
+ }
+ return this;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
+ }
+
+ @Override
+ public int hashCode() {
+ return originalQuery.hashCode();
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da841be8/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
index 5fecdc6..dafb6e2 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
@@ -17,6 +17,8 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -29,14 +31,17 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -401,4 +406,76 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
assertEquals(content, o);
}
+
+ public void testPreSpanQueryRewrite() throws IOException {
+ indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
+ initReaderSearcherHighlighter();
+
+ highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected Collection<Query> preSpanQueryRewrite(Query query) {
+ if (query instanceof MyQuery) {
+ return Collections.singletonList(((MyQuery)query).wrapped);
+ }
+ return null;
+ }
+ };
+ highlighter.setHighlightPhrasesStrictly(true);
+
+ BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
+ Query oredTerms = new BooleanQuery.Builder()
+ .setMinimumNumberShouldMatch(2)
+ .add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
+ .build();
+ Query proximityBoostingQuery = new MyQuery(oredTerms);
+ Query totalQuery = bqBuilder
+ .add(phraseQuery, BooleanClause.Occur.SHOULD)
+ .add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
+ assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
+ }
+
+ private static class MyQuery extends Query {
+
+ private final Query wrapped;
+
+ MyQuery(Query wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+ return wrapped.createWeight(searcher, needsScores, boost);
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query newWrapped = wrapped.rewrite(reader);
+ if (newWrapped != wrapped) {
+ return new MyQuery(newWrapped);
+ }
+ return this;
+ }
+
+ @Override
+ public String toString(String field) {
+ return "[[["+wrapped.toString(field)+"]]]";
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
+ }
+
+ @Override
+ public int hashCode() {
+ return wrapped.hashCode();
+ }
+ }
}