You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/10/04 20:12:07 UTC
[2/6] lucene-solr:master: LUCENE-7438: New UnifiedHighlighter
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/722e8271/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestSplittingBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestSplittingBreakIterator.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestSplittingBreakIterator.java
new file mode 100644
index 0000000..b78f329
--- /dev/null
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestSplittingBreakIterator.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.text.BreakIterator;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
+public class TestSplittingBreakIterator extends LuceneTestCase {
+
+
+ private static final BreakIterator LINE_BI = BreakIterator.getLineInstance(Locale.ROOT);
+ private static final BreakIterator SPLIT_BI = new SplittingBreakIterator(LINE_BI, '|');
+
+ public void testLineBreakIterator() {
+ testWithoutSplits(LINE_BI);
+ }
+
+ private void testWithoutSplits(BreakIterator bi) {
+ // these tests have no '|'
+ testBreakIterator(bi,
+ " a",
+ "^^^");
+ testBreakIterator(bi,
+ "aa",
+ "^ ^");
+ testBreakIterator(bi,
+ "aa a",
+ "^ ^^");
+ }
+
+ public void testWithoutSplits() {
+ testWithoutSplits(SPLIT_BI);
+ }
+
+ public void testOnlySingleSplitChar() {
+ testBreakIterator(SPLIT_BI,
+ "|",
+ "^^");
+ }
+
+ public void testSplitThenValue() {
+ testBreakIterator(SPLIT_BI,
+ "|a",
+ "^^^");
+ }
+
+ public void testValueThenSplit() {
+ testBreakIterator(SPLIT_BI,
+ "a|",
+ "^^^");
+ }
+
+ public void testValueThenSplitThenValue() {
+ testBreakIterator(SPLIT_BI,
+ "aa|aa",
+ "^ ^^ ^");
+ }
+
+ public void testValueThenDoubleSplitThenValue() {
+ testBreakIterator(SPLIT_BI,
+ "aa||aa",
+ "^ ^^^ ^");
+ }
+
+ public void testValueThenSplitThenDoubleValueThenSplitThenValue() {
+ testBreakIterator(SPLIT_BI,
+ "a|bb cc|d",
+ "^^^ ^ ^^^");
+ }
+
+ private void testBreakIterator(BreakIterator bi, String text, String boundaries) {
+ bi.setText(text);
+
+ //Test first & last
+ testFirstAndLast(bi, text, boundaries);
+
+ //Test if expected boundaries are consistent with reading them from next() in a loop:
+ assertEquals(boundaries, readBoundariesToString(bi, text));
+
+ //Test following() and preceding():
+ // get each index, randomized in case their is a sequencing bug:
+ List<Integer> indexes = randomIntsBetweenInclusive(text.length() + 1);
+ testFollowing(bi, text, boundaries, indexes);
+ testPreceding(bi, text, boundaries, indexes);
+
+ //Test previous():
+ testPrevious(bi, text, boundaries);
+ }
+
+ private void testFirstAndLast(BreakIterator bi, String text, String boundaries) {
+ String message = "Text: " + text;
+ int current = bi.current();
+ assertEquals(message, boundaries.indexOf('^'), current);
+ assertEquals(message, current, bi.first());
+ assertEquals(message, current, bi.current());
+ current = bi.last();
+ assertEquals(boundaries.lastIndexOf('^'), current);
+ assertEquals(message, current, bi.current());
+ }
+
+ private void testFollowing(BreakIterator bi, String text, String boundaries, List<Integer> indexes) {
+ String message = "Text: " + text;
+ for (Integer index : indexes) {
+ int got = bi.following(index);
+ if (index == boundaries.length()) {
+ assertEquals(message, BreakIterator.DONE, got);
+ assertEquals(boundaries.lastIndexOf('^'), bi.current());
+ continue;
+ }
+ assertEquals(message + " index:" + index, boundaries.indexOf('^', index + 1), got);
+ }
+ }
+
+ private void testPreceding(BreakIterator bi, String text, String boundaries, List<Integer> indexes) {
+ String message = "Text: " + text;
+ for (Integer index : indexes) {
+ int got = bi.preceding(index);
+ if (index == 0) {
+ assertEquals(message, BreakIterator.DONE, got);
+ assertEquals(boundaries.indexOf('^'), bi.current());
+ continue;
+ }
+// if (index == text.length() && got == BreakIterator.DONE) {
+// continue;//hack to accept faulty default impl of BreakIterator.preceding()
+// }
+ assertEquals(message + " index:" + index, boundaries.lastIndexOf('^', index - 1), got);
+ }
+ }
+
+ private List<Integer> randomIntsBetweenInclusive(int end) {
+ List<Integer> indexes = new ArrayList<>(end);
+ for (int i = 0; i < end; i++) {
+ indexes.add(i);
+ }
+ Collections.shuffle(indexes, random());
+ return indexes;
+ }
+
+ private void testPrevious(BreakIterator bi, String text, String boundaries) {
+ String message = "Text: " + text;
+
+ bi.setText(text);
+ int idx = bi.last();//position at the end
+ while (true) {
+ idx = boundaries.lastIndexOf('^', idx - 1);
+ if (idx == -1) {
+ assertEquals(message, BreakIterator.DONE, bi.previous());
+ break;
+ }
+ assertEquals(message, idx, bi.previous());
+ }
+ assertEquals(message, boundaries.indexOf('^'), bi.current());//finishes at first
+ }
+
+ /**
+ * Returns a string comprised of spaces and '^' only at the boundaries.
+ */
+ private String readBoundariesToString(BreakIterator bi, String text) {
+ // init markers to spaces
+ StringBuilder markers = new StringBuilder();
+ markers.setLength(text.length() + 1);
+ for (int k = 0; k < markers.length(); k++) {
+ markers.setCharAt(k, ' ');
+ }
+
+ bi.setText(text);
+ for (int boundary = bi.current(); boundary != BreakIterator.DONE; boundary = bi.next()) {
+ markers.setCharAt(boundary, '^');
+ }
+ return markers.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/722e8271/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
new file mode 100644
index 0000000..0fd7d3d
--- /dev/null
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -0,0 +1,962 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.text.BreakIterator;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.junit.After;
+import org.junit.Before;
+
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
+public class TestUnifiedHighlighter extends LuceneTestCase {
+
+ private final FieldType fieldType; // for "body" generally, but not necessarily others. See constructor
+
+ private MockAnalyzer indexAnalyzer;
+ private Directory dir;
+
+ @ParametersFactory
+ public static Iterable<Object[]> parameters() {
+ return UHTestHelper.parametersFactoryList();
+ }
+
+ public TestUnifiedHighlighter(FieldType fieldType) {
+ this.fieldType = fieldType;
+ }
+
+ @Before
+ public void doBefore() throws IOException {
+ indexAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);//whitespace, punctuation, lowercase
+ dir = newDirectory();
+ }
+
+ @After
+ public void doAfter() throws IOException {
+ dir.close();
+ }
+
+ //
+ // Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer tests.
+ //
+
+ public void testBasics() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+ body.setStringValue("Highlighting the first term. Hope it works.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
+ assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testFormatWithMatchExceedingContentLength2() throws Exception {
+
+ String bodyText = "123 TEST 01234 TEST";
+
+ String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+
+ assertEquals(1, snippets.length);
+ assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
+ }
+
+ public void testFormatWithMatchExceedingContentLength3() throws Exception {
+
+ String bodyText = "123 5678 01234 TEST TEST";
+
+ String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+
+ assertEquals(1, snippets.length);
+ assertEquals("123 5678 01234 TE", snippets[0]);
+ }
+
+ public void testFormatWithMatchExceedingContentLength() throws Exception {
+
+ String bodyText = "123 5678 01234 TEST";
+
+ String[] snippets = formatWithMatchExceedingContentLength(bodyText);
+
+ assertEquals(1, snippets.length);
+ // LUCENE-5166: no snippet
+ assertEquals("123 5678 01234 TE", snippets[0]);
+ }
+
+ private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {
+
+ int maxLength = 17;
+
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ final Field body = new Field("body", bodyText, fieldType);
+
+ Document doc = new Document();
+ doc.add(body);
+
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+
+ Query query = new TermQuery(new Term("body", "test"));
+
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxLength(maxLength);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+
+
+ ir.close();
+ return snippets;
+ }
+
+ // simple test highlighting last word.
+ public void testHighlightLastWord() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(1, snippets.length);
+ assertEquals("This is a <b>test</b>", snippets[0]);
+
+ ir.close();
+ }
+
+ // simple test with one sentence documents.
+ public void testOneSentence() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ // simple test with multiple values that make a result longer than maxLength.
+ public void testMaxLengthWithMultivalue() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ final String value = "This is a multivalued field. Sentencetwo field.";
+ doc.add(new Field("body", value, fieldType));
+ doc.add(new Field("body", value, fieldType));
+ doc.add(new Field("body", value, fieldType));
+
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxLength(value.length() * 2 + 1);
+ Query query = new TermQuery(new Term("body", "field"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 10);
+ assertEquals(1, snippets.length);
+ String highlightedValue = "This is a multivalued <b>field</b>. Sentencetwo <b>field</b>.";
+ assertEquals(highlightedValue + "... " + highlightedValue, snippets[0]);
+
+ ir.close();
+ }
+
+ public void testMultipleFields() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Field title = new Field("title", "", UHTestHelper.randomFieldType(random()));
+ Document doc = new Document();
+ doc.add(body);
+ doc.add(title);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ title.setStringValue("I am hoping for the best.");
+ iw.addDocument(doc);
+ body.setStringValue("Highlighting the first term. Hope it works.");
+ title.setStringValue("But best may not be good enough.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ Map<String, String[]> snippets = highlighter.highlightFields(new String[]{"body", "title"}, query, topDocs);
+ assertEquals(2, snippets.size());
+ assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
+ assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
+ assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
+ assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
+ ir.close();
+ }
+
+ public void testMultipleTerms() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+ body.setStringValue("Highlighting the first term. Hope it works.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
+ assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testMultiplePassages() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+ body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
+ assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testBuddhism() throws Exception {
+ String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
+ "range of academic disciplines published over the last forty years. With a new introduction " +
+ "by the editor, this collection is a unique and unrivalled research resource for both " +
+ "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
+ "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
+ "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
+ "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
+ "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
+ "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", text, fieldType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ PhraseQuery query = new PhraseQuery.Builder()
+ .add(new Term("body", "buddhist"))
+ .add(new Term("body", "origins"))
+ .build();
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setHighlightPhrasesStrictly(false);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
+ ir.close();
+ }
+
+ public void testCuriousGeorge() throws Exception {
+ String text = "It\u2019s the formula for success for preschoolers\u2014Curious George and fire trucks! " +
+ "Curious George and the Firefighters is a story based on H. A. and Margret Rey\u2019s " +
+ "popular primate and painted in the original watercolor and charcoal style. " +
+ "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", text, fieldType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ PhraseQuery query = new PhraseQuery.Builder()
+ .add(new Term("body", "curious"))
+ .add(new Term("body", "george"))
+ .build();
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setHighlightPhrasesStrictly(false);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
+ ir.close();
+ }
+
+ public void testCambridgeMA() throws Exception {
+ BufferedReader r = new BufferedReader(new InputStreamReader(
+ this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
+ String text = r.readLine();
+ r.close();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+ Field body = new Field("body", text, fieldType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxLength(Integer.MAX_VALUE - 1);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertTrue(snippets[0].contains("<b>Square</b>"));
+ assertTrue(snippets[0].contains("<b>Porter</b>"));
+ ir.close();
+ }
+
+ public void testPassageRanking() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertEquals("This is a <b>test</b>. ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+
+ ir.close();
+ }
+
+ public void testBooleanMustNot() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", fieldType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+
+ BooleanQuery query2 = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT)
+ .build();
+
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD)
+ .add(query2, BooleanClause.Occur.SHOULD)
+ .build();
+
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxLength(Integer.MAX_VALUE - 1);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertFalse(snippets[0].contains("<b>both</b>"));
+ ir.close();
+ }
+
+ public void testHighlightAllText() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
+ };
+ highlighter.setMaxLength(10000);
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+
+ ir.close();
+ }
+
+ public void testSpecificDocIDs() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+ body.setStringValue("Highlighting the first term. Hope it works.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ ScoreDoc[] hits = topDocs.scoreDocs;
+ int[] docIDs = new int[2];
+ docIDs[0] = hits[0].doc;
+ docIDs[1] = hits[1].doc;
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{1}).get("body");
+ assertEquals(2, snippets.length);
+ assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
+ assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testCustomFieldValueSource() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
+ Field body = new Field("body", text, fieldType);
+ doc.add(body);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected List<CharSequence[]> loadFieldValues(String[] fields,
+ DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
+ assert fields.length == 1;
+ assert docIter.cost() == 1;
+ docIter.nextDoc();
+ return Collections.singletonList(new CharSequence[]{text});
+ }
+
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
+ };
+
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
+
+ ir.close();
+ }
+
+ /**
+ * Make sure highlighter returns first N sentences if
+ * there were no hits.
+ */
+ public void testEmptyHighlights() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", fieldType);
+ doc.add(body);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[]{0};
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals("test this is. another sentence this test has. ", snippets[0]);
+
+ ir.close();
+ }
+
+ /**
+ * Not empty but nothing analyzes. Ensures we address null term-vectors.
+ */
+ public void testNothingAnalyzes() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+ doc.add(new Field("body", " ", fieldType));// just a space! (thus not empty)
+ doc.add(newTextField("id", "id", Field.Store.YES));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("body", "something", fieldType));
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[1];
+ docIDs[0] = docID;
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals(" ", snippets[0]);
+
+ ir.close();
+ }
+
+ /**
+ * Make sure highlighter we can customize how emtpy
+ * highlight is returned.
+ */
+ public void testCustomEmptyHighlights() throws Exception {
+ indexAnalyzer.setPositionIncrementGap(10);
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", fieldType);
+ doc.add(body);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxNoHighlightPassages(0);// don't want any default summary
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[]{0};
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertNull(snippets[0]);
+
+ ir.close();
+ }
+
+ /**
+ * Make sure highlighter returns whole text when there
+ * are no hits and BreakIterator is null.
+ */
+ public void testEmptyHighlightsWhole() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", fieldType);
+ doc.add(body);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
+ };
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[]{0};
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals("test this is. another sentence this test has. far away is that planet.", snippets[0]);
+
+ ir.close();
+ }
+
+ /**
+ * Make sure highlighter is OK with entirely missing
+ * field.
+ */
+ public void testFieldIsMissing() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+
+ Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", fieldType);
+ doc.add(body);
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new TermQuery(new Term("bogus", "highlighting"));
+ int[] docIDs = new int[]{0};
+ String snippets[] = highlighter.highlightFields(new String[]{"bogus"}, query, docIDs, new int[]{2}).get("bogus");
+ assertEquals(1, snippets.length);
+ assertNull(snippets[0]);
+
+ ir.close();
+ }
+
+ public void testFieldIsJustSpace() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+ doc.add(new Field("body", " ", fieldType));
+ doc.add(newTextField("id", "id", Field.Store.YES));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("body", "something", fieldType));
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[1];
+ docIDs[0] = docID;
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals(" ", snippets[0]);
+
+ ir.close();
+ }
+
+ public void testFieldIsEmptyString() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+ doc.add(new Field("body", "", fieldType));
+ doc.add(newTextField("id", "id", Field.Store.YES));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("body", "something", fieldType));
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ int[] docIDs = new int[1];
+ docIDs[0] = docID;
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertNull(snippets[0]);
+
+ ir.close();
+ }
+
+ public void testMultipleDocs() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ String content = "the answer is " + i;
+ if ((i & 1) == 0) {
+ content += " some more terms";
+ }
+ doc.add(new Field("body", content, fieldType));
+ doc.add(newStringField("id", "" + i, Field.Store.YES));
+ iw.addDocument(doc);
+
+ if (random().nextInt(10) == 2) {
+ iw.commit();
+ }
+ }
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setCacheFieldValCharsThreshold(random().nextInt(10) * 10);// 0 thru 90 intervals of 10
+ Query query = new TermQuery(new Term("body", "answer"));
+ TopDocs hits = searcher.search(query, numDocs);
+ assertEquals(numDocs, hits.totalHits);
+
+ String snippets[] = highlighter.highlight("body", query, hits);
+ assertEquals(numDocs, snippets.length);
+ for (int hit = 0; hit < numDocs; hit++) {
+ Document doc = searcher.doc(hits.scoreDocs[hit].doc);
+ int id = Integer.parseInt(doc.get("id"));
+ String expected = "the <b>answer</b> is " + id;
+ if ((id & 1) == 0) {
+ expected += " some more terms";
+ }
+ assertEquals(expected, snippets[hit]);
+ }
+
+ ir.close();
+ }
+
+ public void testMultipleSnippetSizes() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Field title = new Field("title", "", UHTestHelper.randomFieldType(random()));
+ Document doc = new Document();
+ doc.add(body);
+ doc.add(title);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD)
+ .add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD)
+ .build();
+ Map<String, String[]> snippets = highlighter.highlightFields(new String[]{"title", "body"}, query, new int[]{0}, new int[]{1, 2});
+ String titleHighlight = snippets.get("title")[0];
+ String bodyHighlight = snippets.get("body")[0];
+ assertEquals("This is a <b>test</b>. ", titleHighlight);
+ assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
+ ir.close();
+ }
+
+ public void testEncode() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected PassageFormatter getFormatter(String field) {
+ return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
+ }
+ };
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(1, snippets.length);
+ assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
+
+ ir.close();
+ }
+
+ // LUCENE-4906
+ public void testObjectFormatter() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+ @Override
+ protected PassageFormatter getFormatter(String field) {
+ return new PassageFormatter() {
+ PassageFormatter defaultFormatter = new DefaultPassageFormatter();
+
+ @Override
+ public String[] format(Passage passages[], String content) {
+ // Just turns the String snippet into a length 2
+ // array of String
+ return new String[]{"blah blah", defaultFormatter.format(passages, content).toString()};
+ }
+ };
+ }
+ };
+
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ int[] docIDs = new int[1];
+ docIDs[0] = topDocs.scoreDocs[0].doc;
+ Map<String, Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, docIDs, new int[]{1});
+ Object[] bodySnippets = snippets.get("body");
+ assertEquals(1, bodySnippets.length);
+ assertTrue(Arrays.equals(new String[]{"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
+
+ ir.close();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/722e8271/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
new file mode 100644
index 0000000..63f0bb1
--- /dev/null
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -0,0 +1,936 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.BaseDirectoryWrapper;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.junit.After;
+import org.junit.Before;
+
+/**
+ * Some tests that highlight wildcard, fuzzy, etc queries.
+ */
+@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
+public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
+
+ final FieldType fieldType;
+
+ BaseDirectoryWrapper dir;
+ Analyzer indexAnalyzer;
+
+ @ParametersFactory
+ public static Iterable<Object[]> parameters() {
+ return UHTestHelper.parametersFactoryList();
+ }
+
+ public TestUnifiedHighlighterMTQ(FieldType fieldType) {
+ this.fieldType = fieldType;
+ }
+
+ @Before
+ public void doBefore() throws IOException {
+ dir = newDirectory();
+ indexAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);//whitespace, punctuation, lowercase
+ }
+
+ @After
+ public void doAfter() throws IOException {
+ dir.close();
+ }
+
+ public void testWildcards() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new WildcardQuery(new Term("body", "te*"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // disable MTQ; won't highlight
+ highlighter.setHandleMultiTermQuery(false);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+ highlighter.setHandleMultiTermQuery(true);//reset
+
+ // wrong field
+ BooleanQuery bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("bogus", "te*")), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testOnePrefix() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new PrefixQuery(new Term("body", "te"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // wrong field
+ BooleanQuery bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testOneRegexp() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new RegexpQuery(new Term("body", "te.*"));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // wrong field
+ BooleanQuery bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testOneFuzzy() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new FuzzyQuery(new Term("body", "tets"), 1);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // with prefix
+ query = new FuzzyQuery(new Term("body", "tets"), 1, 2);
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // wrong field
+ BooleanQuery bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(new FuzzyQuery(new Term("bogus", "tets"), 1), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testRanges() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // null start
+ query = TermRangeQuery.newStringRange("body", null, "tf", true, true);
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);
+
+ // null end
+ query = TermRangeQuery.newStringRange("body", "ta", null, true, true);
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // exact start inclusive
+ query = TermRangeQuery.newStringRange("body", "test", "tf", true, true);
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // exact end inclusive
+ query = TermRangeQuery.newStringRange("body", "ta", "test", true, true);
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // exact start exclusive
+ BooleanQuery bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ // exact end exclusive
+ bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ // wrong field
+ bq = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD)
+ .build();
+ topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", bq, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testWildcardInBoolean() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ // must not
+ query = new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("bogus", "te*")), BooleanClause.Occur.MUST_NOT)
+ .build();
+ topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a test.", snippets[0]);
+ assertEquals("Test a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testWildcardInFiltered() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.MUST)
+ .add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.FILTER)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testWildcardInConstantScore() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testWildcardInDisjunctionMax() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ DisjunctionMaxQuery query = new DisjunctionMaxQuery(
+ Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testSpanWildcard() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testSpanOr() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
+ Query query = new SpanOrQuery(new SpanQuery[]{childQuery});
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testSpanNear() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
+ Query query = new SpanNearQuery(new SpanQuery[]{childQuery, childQuery}, 0, false);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testSpanNot() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
+ SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
+ Query query = new SpanNotQuery(include, exclude);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ public void testSpanPositionCheck() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test.");
+ iw.addDocument(doc);
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
+ Query query = new SpanFirstQuery(childQuery, 1000000);
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(2, snippets.length);
+ assertEquals("This is a <b>test</b>.", snippets[0]);
+ assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
+
+ ir.close();
+ }
+
+ /**
+ * Runs a query with two MTQs and confirms the formatter
+ * can tell which query matched which hit.
+ */
+ public void testWhichMTQMatched() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("Test a one sentence document.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, topDocs);
+ assertEquals(1, snippets.length);
+
+ // Default formatter just bolds each hit:
+ assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
+
+ // Now use our own formatter, that also stuffs the
+ // matching term's text into the result:
+ highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
+
+ @Override
+ protected PassageFormatter getFormatter(String field) {
+ return new PassageFormatter() {
+
+ @Override
+ public Object format(Passage passages[], String content) {
+ // Copied from DefaultPassageFormatter, but
+ // tweaked to include the matched term:
+ StringBuilder sb = new StringBuilder();
+ int pos = 0;
+ for (Passage passage : passages) {
+ // don't add ellipsis if its the first one, or if its connected.
+ if (passage.startOffset > pos && pos > 0) {
+ sb.append("... ");
+ }
+ pos = passage.startOffset;
+ for (int i = 0; i < passage.numMatches; i++) {
+ int start = passage.matchStarts[i];
+ int end = passage.matchEnds[i];
+ // its possible to have overlapping terms
+ if (start > pos) {
+ sb.append(content, pos, start);
+ }
+ if (end > pos) {
+ sb.append("<b>");
+ sb.append(content, Math.max(pos, start), end);
+ sb.append('(');
+ sb.append(passage.getMatchTerms()[i].utf8ToString());
+ sb.append(')');
+ sb.append("</b>");
+ pos = end;
+ }
+ }
+ // its possible a "term" from the analyzer could span a sentence boundary.
+ sb.append(content, pos, Math.max(pos, passage.endOffset));
+ pos = passage.endOffset;
+ }
+ return sb.toString();
+ }
+ };
+ }
+ };
+
+ assertEquals(1, topDocs.totalHits);
+ snippets = highlighter.highlight("body", query, topDocs);
+ assertEquals(1, snippets.length);
+
+ // Default formatter bolds each hit:
+ assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);
+
+ ir.close();
+ }
+
+
+ //
+ // All tests below were *not* ported from the PostingsHighlighter; they are new to the U.H.
+ //
+
+ public void testWithMaxLen() throws IOException {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");//44 char long, 2 sentences
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ highlighter.setMaxLength(25);//a little past first sentence
+
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST)
+ .add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST)
+ .build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);//ask for 2 but we'll only get 1
+ assertArrayEquals(
+ new String[]{"<b>Alpha</b> <b>Bravo</b> foo foo foo. "}, snippets
+ );
+
+ ir.close();
+ }
+
+ public void testTokenStreamIsClosed() throws IOException {
+ // note: test is a derivative of testWithMaxLen()
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Field body = new Field("body", "", fieldType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");
+ if (random().nextBoolean()) { // sometimes add a 2nd value (maybe matters?)
+ doc.add(new Field("body", "2nd value Alpha Bravo", fieldType));
+ }
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ // use this buggy Analyzer at highlight time
+ Analyzer buggyAnalyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer buggyTokenizer = new Tokenizer() {
+ @Override
+ public boolean incrementToken() throws IOException {
+ throw new IOException("EXPECTED");
+ }
+ };
+ return new TokenStreamComponents(buggyTokenizer);
+ }
+ };
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, buggyAnalyzer);
+ highlighter.setHandleMultiTermQuery(true);
+ if (rarely()) {
+ highlighter.setMaxLength(25);//a little past first sentence
+ }
+
+ boolean hasClauses = false;
+ BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+ if (random().nextBoolean()) {
+ hasClauses = true;
+ queryBuilder.add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST);
+ }
+ if (!hasClauses || random().nextBoolean()) {
+ queryBuilder.add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST);
+ }
+ BooleanQuery query = queryBuilder.build();
+ TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+ try {
+ String snippets[] = highlighter.highlight("body", query, topDocs, 2);
+ // don't even care what the results are; just want to test exception behavior
+ if (fieldType == UHTestHelper.reanalysisType) {
+ fail("Expecting EXPECTED IOException");
+ }
+ } catch (IOException e) {
+ if (!e.getMessage().equals("EXPECTED")) {
+ throw e;
+ }
+ }
+ ir.close();
+
+ // Now test we can get the tokenStream without it puking due to IllegalStateException for not calling close()
+
+ try (TokenStream ts = buggyAnalyzer.tokenStream("body", "anything")) {
+ ts.reset();// hopefully doesn't throw
+ // don't call incrementToken; we know it's buggy ;-)
+ }
+ }
+
+ /**
+ * Not empty but nothing analyzes. Ensures we address null term-vectors.
+ */
+ public void testNothingAnalyzes() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+ doc.add(new Field("body", " ", fieldType));// just a space! (thus not empty)
+ doc.add(newTextField("id", "id", Field.Store.YES));
+ iw.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new Field("body", "something", fieldType));
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ Query query = new PrefixQuery(new Term("body", "nonexistent"));
+ int[] docIDs = new int[1];
+ docIDs[0] = docID;
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals(" ", snippets[0]);
+
+ ir.close();
+ }
+
+ public void testMultiSegment() throws Exception {
+ // If we incorrectly got the term vector from mis-matched global/leaf doc ID, this test may fail
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+ Document doc = new Document();
+ doc.add(new Field("body", "word aberration", fieldType));
+ iw.addDocument(doc);
+
+ iw.commit(); // make segment
+
+ doc = new Document();
+ doc.add(new Field("body", "word absolve", fieldType));
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ Query query = new PrefixQuery(new Term("body", "ab"));
+ TopDocs topDocs = searcher.search(query, 10);
+
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, topDocs).get("body");
+ Arrays.sort(snippets);
+ assertEquals("[word <b>aberration</b>, word <b>absolve</b>]", Arrays.toString(snippets));
+
+ ir.close();
+ }
+
+ public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+ Document doc = new Document();
+ doc.add(new Field("body", "iterate insect ipswitch illinois indirect", fieldType));
+ doc.add(newTextField("id", "id", Field.Store.YES));
+
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
+
+ PhraseQuery pq = new PhraseQuery.Builder()
+ .add(new Term("body", "consent"))
+ .add(new Term("body", "order"))
+ .build();
+
+ BooleanQuery query = new BooleanQuery.Builder()
+ .add(new WildcardQuery(new Term("body", "enforc*")), BooleanClause.Occur.MUST)
+ .add(pq, BooleanClause.Occur.MUST)
+ .build();
+
+ int[] docIds = new int[]{docID};
+
+ String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
+ assertEquals(1, snippets.length);
+ assertEquals("iterate insect ipswitch illinois indirect", snippets[0]);
+ ir.close();
+ }
+
+}