You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/11 08:01:24 UTC
[48/50] [abbrv] lucene-solr:jira/solr-11702: LUCENE-8121: UH switch
to SpanCollector API. Better accuracy. * Use the filtered freq in position
sensitive terms (better scores) * Refactored UH's OffsetsEnum * Improved test
randomization in TestUnifiedHighl
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/352ec01a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
index acc4bd7..08820aa 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterStrictPhrases.java
@@ -17,6 +17,7 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@@ -38,6 +39,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
@@ -46,6 +48,7 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
@@ -55,6 +58,7 @@ import org.apache.lucene.util.QueryBuilder;
import org.junit.After;
import org.junit.Before;
+//TODO rename to reflect position sensitivity
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
final FieldType fieldType;
@@ -151,6 +155,16 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
+
+ // test the Passage only has 3 matches. We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
+ highlighter.setFormatter(new PassageFormatter() {
+ @Override
+ public Object format(Passage[] passages, String content) {
+ return Arrays.toString(passages);
+ }
+ });
+ assertArrayEquals(new String[]{"[Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213]"},
+ highlighter.highlight("body", query, topDocs));
}
public void testPhraseNotInDoc() throws IOException {
@@ -185,6 +199,16 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
+
+ // test the Passage only has 3 matches. We don't want duplicates from both PhraseQuery
+ highlighter.setFormatter(new PassageFormatter() {
+ @Override
+ public Object format(Passage[] passages, String content) {
+ return Arrays.toString(passages);
+ }
+ });
+ assertArrayEquals(new String[]{"[Passage[0-41]{alpha[0-5],bravo[6-11],charlie[12-19]}score=3.931102]"},
+ highlighter.highlight("body", query, topDocs));
}
public void testSynonyms() throws IOException {
@@ -477,4 +501,68 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
return wrapped.hashCode();
}
}
+
+ // Ported from LUCENE-5455 (fixed in LUCENE-8121). Also see LUCENE-2287.
+ public void testNestedSpanQueryHighlight() throws Exception {
+ // For a long time, the highlighters used to assume all query terms within the SpanQuery were valid at the Spans'
+ // position range. This would highlight occurrences of terms that were actually not matched by the query.
+ // But now using the SpanCollector API we don't make this kind of mistake.
+ final String FIELD_NAME = "body";
+ final String indexedText = "x y z x z x a";
+ indexWriter.addDocument(newDoc(indexedText));
+ initReaderSearcherHighlighter();
+ TopDocs topDocs = new TopDocs(1, new ScoreDoc[]{new ScoreDoc(0, 1f)}, 1f);
+
+ String expected = "<b>x</b> <b>y</b> <b>z</b> x z x <b>a</b>";
+ Query q = new SpanNearQuery(new SpanQuery[] {
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term(FIELD_NAME, "x")),
+ new SpanTermQuery(new Term(FIELD_NAME, "y")),
+ new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
+ new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
+ String observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
+ if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+ assertEquals("Nested SpanNear query not properly highlighted.", expected, observed);
+
+ expected = "x <b>y</b> <b>z</b> <b>x</b> <b>z</b> x <b>a</b>";
+ q = new SpanNearQuery(new SpanQuery[] {
+ new SpanOrQuery(
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term(FIELD_NAME, "x")),
+ new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term(FIELD_NAME, "y")),
+ new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true)),
+ new SpanOrQuery(
+ new SpanTermQuery(new Term(FIELD_NAME, "a")),
+ new SpanTermQuery(new Term(FIELD_NAME, "b")))}, 10, false);
+ observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
+ if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+ assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed);
+
+ expected = "x <b>y</b> <b>z</b> <b>x</b> <b>z</b> x <b>a</b>";
+ q = new SpanNearQuery(new SpanQuery[] {
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD_NAME, "*"))),
+ new SpanTermQuery(new Term(FIELD_NAME, "z"))}, 0, true),
+ new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
+ observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
+ if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+ assertEquals("Nested SpanNear query with wildcard not properly highlighted.", expected, observed);
+
+ expected = "<b>x</b> <b>y</b> z x z x <b>a</b>";
+ q = new SpanNearQuery(new SpanQuery[] {
+ new SpanOrQuery(
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term(FIELD_NAME, "x")),
+ new SpanTermQuery(new Term(FIELD_NAME, "y"))}, 0, true),
+ new SpanNearQuery(new SpanQuery[] { //No hit span query
+ new SpanTermQuery(new Term(FIELD_NAME, "z")),
+ new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 0, true)),
+ new SpanTermQuery(new Term(FIELD_NAME, "a"))}, 10, false);
+ observed = highlighter.highlight(FIELD_NAME, q, topDocs)[0];
+ if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
+ assertEquals("Nested SpanNear query within SpanOr not properly highlighted.", expected, observed);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/352ec01a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 738a0b9..e60b17b 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -218,11 +218,9 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
// this code never runs; just for compilation
Passage p;
- try (OffsetsEnum oe = new OffsetsEnum(null, EMPTY)) {
+ try (OffsetsEnum oe = new OffsetsEnum.OfPostings(null, EMPTY)) {
oe.getTerm();
- oe.getPostingsEnum();
oe.freq();
- oe.hasMorePositions();
oe.nextPosition();
oe.startOffset();
oe.endOffset();