You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2021/10/23 00:40:27 UTC
[lucene] branch main updated: LUCENE-9431: UnifiedHighlighter
WEIGHT_MATCHES is now true by default (#362)
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 2719cf6 LUCENE-9431: UnifiedHighlighter WEIGHT_MATCHES is now true by default (#362)
2719cf6 is described below
commit 2719cf6630eb2bd7cb37d0e8462dc912d8fafd83
Author: David Smiley <ds...@apache.org>
AuthorDate: Fri Oct 22 20:40:22 2021 -0400
LUCENE-9431: UnifiedHighlighter WEIGHT_MATCHES is now true by default (#362)
Co-authored-by: Animesh Pandey <ap...@gmail.com>
---
lucene/CHANGES.txt | 3 ++
.../search/uhighlight/UnifiedHighlighter.java | 16 ++++--
.../search/uhighlight/TestUnifiedHighlighter.java | 19 +++++++
.../uhighlight/TestUnifiedHighlighterMTQ.java | 9 +++-
.../TestUnifiedHighlighterTermIntervals.java | 58 ++++------------------
5 files changed, 51 insertions(+), 54 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b2ac0cb..3cf94ac 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -160,6 +160,9 @@ API Changes
* LUCENE-9325: Sort is now final, and the `setSort()` method has been removed (Alan Woodward)
+* LUCENE-9431: The UnifiedHighlighter's WEIGHT_MATCHES flag is now set by default, provided its
+ requirements are met. It can be disabled via over-riding getFlags (Animesh Pandey, David Smiley)
+
* LUCENE-10158: Add a new interface Unwrappable to the utils package to allow code to
unwrap wrappers/delegators that are added by Lucene's testing framework. This will allow
testing new MMapDirectory implementation based on JDK Project Panama. (Uwe Schindler)
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index fb9b9ba..da3b809 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -86,7 +86,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
* </ul>
*
- * <p>This is thread-safe.
+ * <p>This is thread-safe, notwithstanding the setters.
*
* @lucene.experimental
*/
@@ -823,6 +823,7 @@ public class UnifiedHighlighter {
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
}
+ /** Customize the highlighting flags to use by field. */
protected Set<HighlightFlag> getFlags(String field) {
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
if (shouldHandleMultiTermQuery(field)) {
@@ -834,6 +835,11 @@ public class UnifiedHighlighter {
if (shouldPreferPassageRelevancyOverSpeed(field)) {
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
}
+ if (highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
+ && highlightFlags.contains(HighlightFlag.PHRASES)
+ && highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
+ highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
+ }
return highlightFlags;
}
@@ -1168,9 +1174,11 @@ public class UnifiedHighlighter {
/**
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
- * more accurate to the query, though might not calculate passage relevancy as well. Use of this
- * flag requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES}. {@link
- * #PASSAGE_RELEVANCY_OVER_SPEED} will be ignored. False by default.
+ * more accurate to the query, and the snippets can be a little different for phrases because
+ * the whole phrase is marked up instead of each word. The passage relevancy calculation can be
+ * different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
+ * requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES} and {@link
+ * #PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true by default.
*/
WEIGHT_MATCHES
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 1f23c3f..12c2259 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -460,6 +460,25 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
ir.close();
}
+ public void testHighlighterDefaultFlags() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+ Document document = new Document();
+ document.add(new Field("body", "test body", fieldType));
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Set<HighlightFlag> flags = highlighter.getFlags("body");
+ assertTrue(flags.contains(HighlightFlag.PHRASES));
+ assertTrue(flags.contains(HighlightFlag.MULTI_TERM_QUERY));
+ assertTrue(flags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED));
+ assertTrue(flags.contains(HighlightFlag.WEIGHT_MATCHES));
+ // if more flags are added, bump the number below and add an assertTrue or assertFalse above
+ assertEquals(4, HighlightFlag.values().length);
+ ir.close();
+ }
+
public void testCuriousGeorge() throws Exception {
String text =
"It’s the formula for success for preschoolers—Curious George and fire trucks! "
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 4ddf238..b48d304 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -778,10 +778,15 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
+ // All flags are enabled.
assertEquals(
- "<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.",
+ "" + highlighter.getFlags("body"),
+ HighlightFlag.values().length,
+ highlighter.getFlags("body").size());
+ assertEquals(
+ "" + highlighter.getFlags("title"),
+ "<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(sentence)</b> document.",
snippets[0]);
-
ir.close();
}
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java
index 8e602b1..13588c3 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java
@@ -27,7 +27,6 @@ import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
@@ -87,47 +86,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
static UnifiedHighlighter randomUnifiedHighlighter(
IndexSearcher searcher, Analyzer indexAnalyzer) {
- return randomUnifiedHighlighter(
+ return TestUnifiedHighlighter.randomUnifiedHighlighter(
searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
}
- static UnifiedHighlighter randomUnifiedHighlighter(
- IndexSearcher searcher,
- Analyzer indexAnalyzer,
- EnumSet<HighlightFlag> mandatoryFlags,
- Boolean requireFieldMatch) {
- final UnifiedHighlighter uh =
- new UnifiedHighlighter(searcher, indexAnalyzer) {
- Set<HighlightFlag> flags; // consistently random set of flags for this test run
-
- @Override
- protected Set<HighlightFlag> getFlags(String field) {
- if (flags != null) {
- return flags;
- }
- final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
- int r = random().nextInt();
- for (HighlightFlag highlightFlag : HighlightFlag.values()) {
- if (((1 << highlightFlag.ordinal()) & r) == 0) {
- result.add(highlightFlag);
- }
- }
- if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
- // these two are required for WEIGHT_MATCHES
- result.add(HighlightFlag.MULTI_TERM_QUERY);
- result.add(HighlightFlag.PHRASES);
- }
- return flags = result;
- }
- };
- uh.setCacheFieldValCharsThreshold(random().nextInt(100));
- if (requireFieldMatch == Boolean.FALSE
- || (requireFieldMatch == null && random().nextBoolean())) {
- uh.setFieldMatcher(f -> true); // requireFieldMatch==false
- }
- return uh;
- }
-
//
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer
// tests.
@@ -1033,15 +995,15 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
assertEquals(1, topDocs.totalHits.value);
String[] snippets = highlighter.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
- if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
- assertEquals(
- "" + highlighter.getFlags("title"), "<b>This is the title field</b>.", snippets[0]);
- } else {
- assertEquals(
- "" + highlighter.getFlags("title"),
- "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
- snippets[0]);
- }
+ // All flags are enabled.
+ assertEquals(
+ "" + highlighter.getFlags("title"),
+ HighlightFlag.values().length,
+ highlighter.getFlags("title").size());
+ assertEquals(
+ "" + highlighter.getFlags("title"),
+ "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
+ snippets[0]);
ir.close();
}