You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/05/25 17:55:18 UTC
[09/44] lucene-solr:jira/solr-8668: LUCENE-7815: Removed the
PostingsHighlighter
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
deleted file mode 100644
index 4b7e66b..0000000
--- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
+++ /dev/null
@@ -1,1185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queries.CustomScoreQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.text.BreakIterator;
-import java.util.Arrays;
-import java.util.Map;
-
-public class TestPostingsHighlighter extends LuceneTestCase {
-
- public void testBasics() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
- body.setStringValue("Highlighting the first term. Hope it works.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "highlighting"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(2, snippets.length);
- assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
- assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
-
- ir.close();
- dir.close();
- }
-
- public void testFormatWithMatchExceedingContentLength2() throws Exception {
-
- String bodyText = "123 TEST 01234 TEST";
-
- String[] snippets = formatWithMatchExceedingContentLength(bodyText);
-
- assertEquals(1, snippets.length);
- assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
- }
-
- public void testFormatWithMatchExceedingContentLength3() throws Exception {
-
- String bodyText = "123 5678 01234 TEST TEST";
-
- String[] snippets = formatWithMatchExceedingContentLength(bodyText);
-
- assertEquals(1, snippets.length);
- assertEquals("123 5678 01234 TE", snippets[0]);
- }
-
- public void testFormatWithMatchExceedingContentLength() throws Exception {
-
- String bodyText = "123 5678 01234 TEST";
-
- String[] snippets = formatWithMatchExceedingContentLength(bodyText);
-
- assertEquals(1, snippets.length);
- // LUCENE-5166: no snippet
- assertEquals("123 5678 01234 TE", snippets[0]);
- }
-
- private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {
-
- int maxLength = 17;
-
- final Analyzer analyzer = new MockAnalyzer(random());
-
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
- fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- final Field body = new Field("body", bodyText, fieldType);
-
- Document doc = new Document();
- doc.add(body);
-
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
-
- Query query = new TermQuery(new Term("body", "test"));
-
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
-
- PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
-
-
- ir.close();
- dir.close();
- return snippets;
- }
-
- // simple test highlighting last word.
- public void testHighlightLastWord() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(1, snippets.length);
- assertEquals("This is a <b>test</b>", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- // simple test with one sentence documents.
- public void testOneSentence() throws Exception {
- Directory dir = newDirectory();
- // use simpleanalyzer for more natural tokenization (else "test." is a token)
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test.");
- iw.addDocument(doc);
- body.setStringValue("Test a one sentence document.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(2, snippets.length);
- assertEquals("This is a <b>test</b>.", snippets[0]);
- assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
-
- ir.close();
- dir.close();
- }
-
- // simple test with multiple values that make a result longer than maxLength.
- public void testMaxLengthWithMultivalue() throws Exception {
- Directory dir = newDirectory();
- // use simpleanalyzer for more natural tokenization (else "test." is a token)
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- for(int i = 0; i < 3 ; i++) {
- Field body = new Field("body", "", offsetsType);
- body.setStringValue("This is a multivalued field");
- doc.add(body);
- }
-
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(40);
- Query query = new TermQuery(new Term("body", "field"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(1, snippets.length);
- assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
- snippets[0].length() == (40 + "<b></b>".length()));
-
- ir.close();
- dir.close();
- }
-
- public void testMultipleFields() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Field title = new Field("title", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
- doc.add(title);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- title.setStringValue("I am hoping for the best.");
- iw.addDocument(doc);
- body.setStringValue("Highlighting the first term. Hope it works.");
- title.setStringValue("But best may not be good enough.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
- TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query.build(), searcher, topDocs);
- assertEquals(2, snippets.size());
- assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
- assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
- assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
- assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
- ir.close();
- dir.close();
- }
-
- public void testMultipleTerms() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
- body.setStringValue("Highlighting the first term. Hope it works.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD);
- TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs);
- assertEquals(2, snippets.length);
- assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
- assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
-
- ir.close();
- dir.close();
- }
-
- public void testMultiplePassages() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
- body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(2, snippets.length);
- assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
- assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
-
- ir.close();
- dir.close();
- }
-
- public void testUserFailedToIndexOffsets() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType positionsType = new FieldType(TextField.TYPE_STORED);
- positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- Field body = new Field("body", "", positionsType);
- Field title = new StringField("title", "", Field.Store.YES);
- Document doc = new Document();
- doc.add(body);
- doc.add(title);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- title.setStringValue("test");
- iw.addDocument(doc);
- body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
- title.setStringValue("test");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- expectThrows(IllegalArgumentException.class, () -> {
- highlighter.highlight("body", query, searcher, topDocs, 2);
- });
-
- expectThrows(IllegalArgumentException.class, () -> {
- highlighter.highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2);
- fail("did not hit expected exception");
- });
-
- ir.close();
- dir.close();
- }
-
- public void testBuddhism() throws Exception {
- String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
- "range of academic disciplines published over the last forty years. With a new introduction " +
- "by the editor, this collection is a unique and unrivalled research resource for both " +
- "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
- "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
- "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
- "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
- "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
- "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
-
- FieldType positionsType = new FieldType(TextField.TYPE_STORED);
- positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", text, positionsType);
- Document document = new Document();
- document.add(body);
- iw.addDocument(document);
- IndexReader ir = iw.getReader();
- iw.close();
- IndexSearcher searcher = newSearcher(ir);
- PhraseQuery query = new PhraseQuery("body", "buddhist", "origins");
- TopDocs topDocs = searcher.search(query, 10);
- assertEquals(1, topDocs.totalHits);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
- ir.close();
- dir.close();
- }
-
- public void testCuriousGeorge() throws Exception {
- String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
- "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
- "popular primate and painted in the original watercolor and charcoal style. " +
- "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
- FieldType positionsType = new FieldType(TextField.TYPE_STORED);
- positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", text, positionsType);
- Document document = new Document();
- document.add(body);
- iw.addDocument(document);
- IndexReader ir = iw.getReader();
- iw.close();
- IndexSearcher searcher = newSearcher(ir);
- PhraseQuery query = new PhraseQuery("body", "curious", "george");
- TopDocs topDocs = searcher.search(query, 10);
- assertEquals(1, topDocs.totalHits);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
- ir.close();
- dir.close();
- }
-
- public void testCambridgeMA() throws Exception {
- BufferedReader r = new BufferedReader(new InputStreamReader(
- this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
- String text = r.readLine();
- r.close();
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
- FieldType positionsType = new FieldType(TextField.TYPE_STORED);
- positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", text, positionsType);
- Document document = new Document();
- document.add(body);
- iw.addDocument(document);
- IndexReader ir = iw.getReader();
- iw.close();
- IndexSearcher searcher = newSearcher(ir);
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD);
- TopDocs topDocs = searcher.search(query.build(), 10);
- assertEquals(1, topDocs.totalHits);
- PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
- String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertTrue(snippets[0].contains("<b>Square</b>"));
- assertTrue(snippets[0].contains("<b>Porter</b>"));
- ir.close();
- dir.close();
- }
-
- public void testPassageRanking() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertEquals("This is a <b>test</b>. ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- public void testBooleanMustNot() throws Exception {
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
- FieldType positionsType = new FieldType(TextField.TYPE_STORED);
- positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", positionsType);
- Document document = new Document();
- document.add(body);
- iw.addDocument(document);
- IndexReader ir = iw.getReader();
- iw.close();
- IndexSearcher searcher = newSearcher(ir);
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
- BooleanQuery.Builder query2 = new BooleanQuery.Builder();
- query.add(query2.build(), BooleanClause.Occur.SHOULD);
- query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
- TopDocs topDocs = searcher.search(query.build(), 10);
- assertEquals(1, topDocs.totalHits);
- PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
- String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertFalse(snippets[0].contains("<b>both</b>"));
- ir.close();
- dir.close();
- }
-
- public void testHighlightAllText() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
- @Override
- protected BreakIterator getBreakIterator(String field) {
- return new WholeBreakIterator();
- }
- };
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- public void testSpecificDocIDs() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
- body.setStringValue("Highlighting the first term. Hope it works.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "highlighting"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- ScoreDoc[] hits = topDocs.scoreDocs;
- int[] docIDs = new int[2];
- docIDs[0] = hits[0].doc;
- docIDs[1] = hits[1].doc;
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
- assertEquals(2, snippets.length);
- assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
- assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
-
- ir.close();
- dir.close();
- }
-
- public void testCustomFieldValueSource() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- Document doc = new Document();
-
- FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
- Field body = new Field("body", text, offsetsType);
- doc.add(body);
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
-
- PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
- @Override
- protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
- assert fields.length == 1;
- assert docids.length == 1;
- String[][] contents = new String[1][1];
- contents[0][0] = text;
- return contents;
- }
-
- @Override
- protected BreakIterator getBreakIterator(String field) {
- return new WholeBreakIterator();
- }
- };
-
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
- assertEquals(1, snippets.length);
- assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- /** Make sure highlighter returns first N sentences if
- * there were no hits. */
- public void testEmptyHighlights() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
- doc.add(body);
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "highlighting"));
- int[] docIDs = new int[] {0};
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
- assertEquals(1, snippets.length);
- assertEquals("test this is. another sentence this test has. ", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- /** Make sure highlighter we can customize how emtpy
- * highlight is returned. */
- public void testCustomEmptyHighlights() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
- doc.add(body);
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter() {
- @Override
- public Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
- return new Passage[0];
- }
- };
- Query query = new TermQuery(new Term("body", "highlighting"));
- int[] docIDs = new int[] {0};
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
- assertEquals(1, snippets.length);
- assertNull(snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- /** Make sure highlighter returns whole text when there
- * are no hits and BreakIterator is null. */
- public void testEmptyHighlightsWhole() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
- doc.add(body);
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
- @Override
- protected BreakIterator getBreakIterator(String field) {
- return new WholeBreakIterator();
- }
- };
- Query query = new TermQuery(new Term("body", "highlighting"));
- int[] docIDs = new int[] {0};
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
- assertEquals(1, snippets.length);
- assertEquals("test this is. another sentence this test has. far away is that planet.", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- /** Make sure highlighter is OK with entirely missing
- * field. */
- public void testFieldIsMissing() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
- doc.add(body);
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("bogus", "highlighting"));
- int[] docIDs = new int[] {0};
- String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
- assertEquals(1, snippets.length);
- assertNull(snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- public void testFieldIsJustSpace() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-
- Document doc = new Document();
- doc.add(new Field("body", " ", offsetsType));
- doc.add(new Field("id", "id", offsetsType));
- iw.addDocument(doc);
-
- doc = new Document();
- doc.add(new Field("body", "something", offsetsType));
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
-
- Query query = new TermQuery(new Term("body", "highlighting"));
- int[] docIDs = new int[1];
- docIDs[0] = docID;
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
- assertEquals(1, snippets.length);
- assertEquals(" ", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- public void testFieldIsEmptyString() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-
- Document doc = new Document();
- doc.add(new Field("body", "", offsetsType));
- doc.add(new Field("id", "id", offsetsType));
- iw.addDocument(doc);
-
- doc = new Document();
- doc.add(new Field("body", "something", offsetsType));
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
-
- Query query = new TermQuery(new Term("body", "highlighting"));
- int[] docIDs = new int[1];
- docIDs[0] = docID;
- String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
- assertEquals(1, snippets.length);
- assertNull(snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- public void testMultipleDocs() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-
- int numDocs = atLeast(100);
- for(int i=0;i<numDocs;i++) {
- Document doc = new Document();
- String content = "the answer is " + i;
- if ((i & 1) == 0) {
- content += " some more terms";
- }
- doc.add(new Field("body", content, offsetsType));
- doc.add(newStringField("id", ""+i, Field.Store.YES));
- iw.addDocument(doc);
-
- if (random().nextInt(10) == 2) {
- iw.commit();
- }
- }
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new TermQuery(new Term("body", "answer"));
- TopDocs hits = searcher.search(query, numDocs);
- assertEquals(numDocs, hits.totalHits);
-
- String snippets[] = highlighter.highlight("body", query, searcher, hits);
- assertEquals(numDocs, snippets.length);
- for(int hit=0;hit<numDocs;hit++) {
- Document doc = searcher.doc(hits.scoreDocs[hit].doc);
- int id = Integer.parseInt(doc.get("id"));
- String expected = "the <b>answer</b> is " + id;
- if ((id & 1) == 0) {
- expected += " some more terms";
- }
- assertEquals(expected, snippets[hit]);
- }
-
- ir.close();
- dir.close();
- }
-
- public void testMultipleSnippetSizes() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Field title = new Field("title", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
- doc.add(title);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
- Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query.build(), searcher, new int[] { 0 }, new int[] { 1, 2 });
- String titleHighlight = snippets.get("title")[0];
- String bodyHighlight = snippets.get("body")[0];
- assertEquals("This is a <b>test</b>. ", titleHighlight);
- assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
- ir.close();
- dir.close();
- }
-
- public void testEncode() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter() {
- @Override
- protected PassageFormatter getFormatter(String field) {
- return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
- }
- };
- Query query = new TermQuery(new Term("body", "highlighting"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(1, snippets.length);
- assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- /** customizing the gap separator to force a sentence break */
- public void testGapSeparator() throws Exception {
- Directory dir = newDirectory();
- // use simpleanalyzer for more natural tokenization (else "test." is a token)
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
-
- Field body1 = new Field("body", "", offsetsType);
- body1.setStringValue("This is a multivalued field");
- doc.add(body1);
-
- Field body2 = new Field("body", "", offsetsType);
- body2.setStringValue("This is something different");
- doc.add(body2);
-
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter() {
- @Override
- protected char getMultiValuedSeparator(String field) {
- assert field.equals("body");
- return '\u2029';
- }
- };
- Query query = new TermQuery(new Term("body", "field"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(1, snippets.length);
- assertEquals("This is a multivalued <b>field</b>\u2029", snippets[0]);
-
- ir.close();
- dir.close();
- }
-
- // LUCENE-4906
- public void testObjectFormatter() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter() {
- @Override
- protected PassageFormatter getFormatter(String field) {
- return new PassageFormatter() {
- PassageFormatter defaultFormatter = new DefaultPassageFormatter();
-
- @Override
- public String[] format(Passage passages[], String content) {
- // Just turns the String snippet into a length 2
- // array of String
- return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
- }
- };
- }
- };
-
- Query query = new TermQuery(new Term("body", "highlighting"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- int[] docIDs = new int[1];
- docIDs[0] = topDocs.scoreDocs[0].doc;
- Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
- Object[] bodySnippets = snippets.get("body");
- assertEquals(1, bodySnippets.length);
- assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
-
- ir.close();
- dir.close();
- }
-
- public void testFieldSometimesMissingFromSegment() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "foo", offsetsType);
- Document doc = new Document();
- doc.add(body);
- iw.addDocument(doc);
-
- // Make a 2nd segment where body is only stored:
- iw.commit();
- doc = new Document();
- doc.add(new StoredField("body", "foo"));
- iw.addDocument(doc);
-
- IndexReader ir = DirectoryReader.open(iw.w);
- iw.close();
-
- IndexSearcher searcher = new IndexSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter();
- Query query = new MatchAllDocsQuery();
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
- assertEquals(2, snippets.length);
- assertEquals("foo", snippets[0]);
- assertNull(snippets[1]);
- ir.close();
- dir.close();
- }
-
- public void testCustomScoreQueryHighlight() throws Exception{
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- TermQuery termQuery = new TermQuery(new Term("body", "very"));
- PostingsHighlighter highlighter = new PostingsHighlighter();
- CustomScoreQuery query = new CustomScoreQuery(termQuery);
-
- IndexSearcher searcher = newSearcher(ir);
- TopDocs hits = searcher.search(query, 10);
- assertEquals(1, hits.totalHits);
-
- String snippets[] = highlighter.highlight("body", query, searcher, hits);
- assertEquals(1, snippets.length);
- assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <b>very</b> long in the middle and finally ends with another reference to Kennedy",
- snippets[0]);
-
- ir.close();
- dir.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
deleted file mode 100644
index 7a693d9..0000000
--- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.postingshighlight;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Random;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-public class TestPostingsHighlighterRanking extends LuceneTestCase {
- /**
- * indexes a bunch of gibberish, and then highlights top(n).
- * asserts that top(n) highlights is a subset of top(n+1) up to some max N
- */
- // TODO: this only tests single-valued fields. we should also index multiple values per field!
- public void testRanking() throws Exception {
- // number of documents: we will check each one
- final int numDocs = atLeast(100);
- // number of top-N snippets, we will check 1 .. N
- final int maxTopN = 5;
- // maximum number of elements to put in a sentence.
- final int maxSentenceLength = 10;
- // maximum number of sentences in a document
- final int maxNumSentences = 20;
-
- Directory dir = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- Document document = new Document();
- Field id = new StringField("id", "", Field.Store.NO);
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- document.add(id);
- document.add(body);
-
- for (int i = 0; i < numDocs; i++) {
- StringBuilder bodyText = new StringBuilder();
- int numSentences = TestUtil.nextInt(random(), 1, maxNumSentences);
- for (int j = 0; j < numSentences; j++) {
- bodyText.append(newSentence(random(), maxSentenceLength));
- }
- body.setStringValue(bodyText.toString());
- id.setStringValue(Integer.toString(i));
- iw.addDocument(document);
- }
-
- IndexReader ir = iw.getReader();
- IndexSearcher searcher = newSearcher(ir);
- for (int i = 0; i < numDocs; i++) {
- checkDocument(searcher, i, maxTopN);
- }
- iw.close();
- ir.close();
- dir.close();
- }
-
- private void checkDocument(IndexSearcher is, int doc, int maxTopN) throws IOException {
- for (int ch = 'a'; ch <= 'z'; ch++) {
- Term term = new Term("body", "" + (char)ch);
- // check a simple term query
- checkQuery(is, new TermQuery(term), doc, maxTopN);
- // check a boolean query
- BooleanQuery.Builder bq = new BooleanQuery.Builder();
- bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
- Term nextTerm = new Term("body", "" + (char)(ch+1));
- bq.add(new TermQuery(nextTerm), BooleanClause.Occur.SHOULD);
- checkQuery(is, bq.build(), doc, maxTopN);
- }
- }
-
- private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
- for (int n = 1; n < maxTopN; n++) {
- final FakePassageFormatter f1 = new FakePassageFormatter();
- PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
- @Override
- protected PassageFormatter getFormatter(String field) {
- assertEquals("body", field);
- return f1;
- }
- };
-
- final FakePassageFormatter f2 = new FakePassageFormatter();
- PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
- @Override
- protected PassageFormatter getFormatter(String field) {
- assertEquals("body", field);
- return f2;
- }
- };
-
- BooleanQuery.Builder bq = new BooleanQuery.Builder();
- bq.add(query, BooleanClause.Occur.MUST);
- bq.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
- TopDocs td = is.search(bq.build(), 1);
- p1.highlight("body", bq.build(), is, td, n);
- p2.highlight("body", bq.build(), is, td, n+1);
- assertTrue(f2.seen.containsAll(f1.seen));
- }
- }
-
- /**
- * returns a new random sentence, up to maxSentenceLength "words" in length.
- * each word is a single character (a-z). The first one is capitalized.
- */
- private String newSentence(Random r, int maxSentenceLength) {
- StringBuilder sb = new StringBuilder();
- int numElements = TestUtil.nextInt(r, 1, maxSentenceLength);
- for (int i = 0; i < numElements; i++) {
- if (sb.length() > 0) {
- sb.append(' ');
- sb.append((char) TestUtil.nextInt(r, 'a', 'z'));
- } else {
- // capitalize the first word to help breakiterator
- sb.append((char) TestUtil.nextInt(r, 'A', 'Z'));
- }
- }
- sb.append(". "); // finalize sentence
- return sb.toString();
- }
-
- /**
- * a fake formatter that doesn't actually format passages.
- * instead it just collects them for asserts!
- */
- static class FakePassageFormatter extends PassageFormatter {
- HashSet<Pair> seen = new HashSet<>();
-
- @Override
- public String format(Passage passages[], String content) {
- for (Passage p : passages) {
- // verify some basics about the passage
- assertTrue(p.getScore() >= 0);
- assertTrue(p.getNumMatches() > 0);
- assertTrue(p.getStartOffset() >= 0);
- assertTrue(p.getStartOffset() <= content.length());
- assertTrue(p.getEndOffset() >= p.getStartOffset());
- assertTrue(p.getEndOffset() <= content.length());
- // we use a very simple analyzer. so we can assert the matches are correct
- int lastMatchStart = -1;
- for (int i = 0; i < p.getNumMatches(); i++) {
- BytesRef term = p.getMatchTerms()[i];
- int matchStart = p.getMatchStarts()[i];
- assertTrue(matchStart >= 0);
- // must at least start within the passage
- assertTrue(matchStart < p.getEndOffset());
- int matchEnd = p.getMatchEnds()[i];
- assertTrue(matchEnd >= 0);
- // always moving forward
- assertTrue(matchStart >= lastMatchStart);
- lastMatchStart = matchStart;
- // single character terms
- assertEquals(matchStart+1, matchEnd);
- // and the offsets must be correct...
- assertEquals(1, term.length);
- assertEquals((char)term.bytes[term.offset], Character.toLowerCase(content.charAt(matchStart)));
- }
- // record just the start/end offset for simplicity
- seen.add(new Pair(p.getStartOffset(), p.getEndOffset()));
- }
- return "bogus!!!!!!";
- }
- }
-
- static class Pair {
- final int start;
- final int end;
-
- Pair(int start, int end) {
- this.start = start;
- this.end = end;
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + end;
- result = prime * result + start;
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
- if (obj == null) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- Pair other = (Pair) obj;
- if (end != other.end) {
- return false;
- }
- if (start != other.start) {
- return false;
- }
- return true;
- }
-
- @Override
- public String toString() {
- return "Pair [start=" + start + ", end=" + end + "]";
- }
- }
-
- /** sets b=0 to disable passage length normalization */
- public void testCustomB() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This is a test. This test is a better test but the sentence is excruiatingly long, " +
- "you have no idea how painful it was for me to type this long sentence into my IDE.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
- @Override
- protected PassageScorer getScorer(String field) {
- return new PassageScorer(1.2f, 0, 87);
- }
- };
- Query query = new TermQuery(new Term("body", "test"));
- TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 1);
- assertEquals(1, snippets.length);
- assertTrue(snippets[0].startsWith("This <b>test</b> is a better <b>test</b>"));
-
- ir.close();
- dir.close();
- }
-
- /** sets k1=0 for simple coordinate-level match (# of query terms present) */
- public void testCustomK1() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
- iwc.setMergePolicy(newLogMergePolicy());
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
-
- FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
- offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Field body = new Field("body", "", offsetsType);
- Document doc = new Document();
- doc.add(body);
-
- body.setStringValue("This has only foo foo. " +
- "On the other hand this sentence contains both foo and bar. " +
- "This has only bar bar bar bar bar bar bar bar bar bar bar bar.");
- iw.addDocument(doc);
-
- IndexReader ir = iw.getReader();
- iw.close();
-
- IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
- @Override
- protected PassageScorer getScorer(String field) {
- return new PassageScorer(0, 0.75f, 87);
- }
- };
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(new TermQuery(new Term("body", "foo")), BooleanClause.Occur.SHOULD);
- query.add(new TermQuery(new Term("body", "bar")), BooleanClause.Occur.SHOULD);
- TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
- assertEquals(1, topDocs.totalHits);
- String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 1);
- assertEquals(1, snippets.length);
- assertTrue(snippets[0].startsWith("On the other hand"));
-
- ir.close();
- dir.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
index 4dd30e2..2434f52 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/LengthGoalBreakIteratorTest.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.QueryBuilder;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestCustomSeparatorBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestCustomSeparatorBreakIterator.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestCustomSeparatorBreakIterator.java
new file mode 100644
index 0000000..cacb9cb
--- /dev/null
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestCustomSeparatorBreakIterator.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.text.BreakIterator;
+import java.util.Locale;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.search.uhighlight.TestWholeBreakIterator.assertSameBreaks;
+import static org.hamcrest.CoreMatchers.equalTo;
+
+public class TestCustomSeparatorBreakIterator extends LuceneTestCase {
+
+ private static final Character[] SEPARATORS = new Character[]{' ', '\u0000', 8233};
+
+ public void testBreakOnCustomSeparator() throws Exception {
+ Character separator = randomSeparator();
+ BreakIterator bi = new CustomSeparatorBreakIterator(separator);
+ String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
+ bi.setText(source);
+ assertThat(bi.current(), equalTo(0));
+ assertThat(bi.first(), equalTo(0));
+ assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
+ assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
+ assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
+ assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
+ assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
+ assertThat(bi.next(), equalTo(BreakIterator.DONE));
+
+ assertThat(bi.last(), equalTo(source.length()));
+ int current = bi.current();
+ assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
+ current = bi.current();
+ assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
+ current = bi.current();
+ assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
+ current = bi.current();
+ assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
+ current = bi.current();
+ assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
+ assertThat(bi.previous(), equalTo(BreakIterator.DONE));
+ assertThat(bi.current(), equalTo(0));
+
+ assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
+
+ assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
+
+ assertThat(bi.first(), equalTo(0));
+ assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
+ }
+
+ public void testSingleSentences() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
+ assertSameBreaks("a", expected, actual);
+ assertSameBreaks("ab", expected, actual);
+ assertSameBreaks("abc", expected, actual);
+ assertSameBreaks("", expected, actual);
+ }
+
+ public void testSliceEnd() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
+ assertSameBreaks("a000", 0, 1, expected, actual);
+ assertSameBreaks("ab000", 0, 1, expected, actual);
+ assertSameBreaks("abc000", 0, 1, expected, actual);
+ assertSameBreaks("000", 0, 0, expected, actual);
+ }
+
+ public void testSliceStart() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
+ assertSameBreaks("000a", 3, 1, expected, actual);
+ assertSameBreaks("000ab", 3, 2, expected, actual);
+ assertSameBreaks("000abc", 3, 3, expected, actual);
+ assertSameBreaks("000", 3, 0, expected, actual);
+ }
+
+ public void testSliceMiddle() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
+ assertSameBreaks("000a000", 3, 1, expected, actual);
+ assertSameBreaks("000ab000", 3, 2, expected, actual);
+ assertSameBreaks("000abc000", 3, 3, expected, actual);
+ assertSameBreaks("000000", 3, 0, expected, actual);
+ }
+
+ /** the current position must be ignored, initial position is always first() */
+ public void testFirstPosition() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
+ assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
+ }
+
+ private static char randomSeparator() {
+ return RandomPicks.randomFrom(random(), SEPARATORS);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index ddf8a92..3ffe95a 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -49,7 +49,6 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestWholeBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestWholeBreakIterator.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestWholeBreakIterator.java
new file mode 100644
index 0000000..8e5b2ff
--- /dev/null
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestWholeBreakIterator.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.uhighlight;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Locale;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestWholeBreakIterator extends LuceneTestCase {
+
+ /** For single sentences, we know WholeBreakIterator should break the same as a sentence iterator */
+ public void testSingleSentences() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new WholeBreakIterator();
+ assertSameBreaks("a", expected, actual);
+ assertSameBreaks("ab", expected, actual);
+ assertSameBreaks("abc", expected, actual);
+ assertSameBreaks("", expected, actual);
+ }
+
+ public void testSliceEnd() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new WholeBreakIterator();
+ assertSameBreaks("a000", 0, 1, expected, actual);
+ assertSameBreaks("ab000", 0, 1, expected, actual);
+ assertSameBreaks("abc000", 0, 1, expected, actual);
+ assertSameBreaks("000", 0, 0, expected, actual);
+ }
+
+ public void testSliceStart() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new WholeBreakIterator();
+ assertSameBreaks("000a", 3, 1, expected, actual);
+ assertSameBreaks("000ab", 3, 2, expected, actual);
+ assertSameBreaks("000abc", 3, 3, expected, actual);
+ assertSameBreaks("000", 3, 0, expected, actual);
+ }
+
+ public void testSliceMiddle() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new WholeBreakIterator();
+ assertSameBreaks("000a000", 3, 1, expected, actual);
+ assertSameBreaks("000ab000", 3, 2, expected, actual);
+ assertSameBreaks("000abc000", 3, 3, expected, actual);
+ assertSameBreaks("000000", 3, 0, expected, actual);
+ }
+
+ /** the current position must be ignored, initial position is always first() */
+ public void testFirstPosition() throws Exception {
+ BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
+ BreakIterator actual = new WholeBreakIterator();
+ assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
+ }
+
+ public static void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
+ assertSameBreaks(new StringCharacterIterator(text),
+ new StringCharacterIterator(text),
+ expected,
+ actual);
+ }
+
+ public static void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
+ assertSameBreaks(text, offset, length, offset, expected, actual);
+ }
+
+ public static void assertSameBreaks(String text, int offset, int length, int current, BreakIterator expected, BreakIterator actual) {
+ assertSameBreaks(new StringCharacterIterator(text, offset, offset+length, current),
+ new StringCharacterIterator(text, offset, offset+length, current),
+ expected,
+ actual);
+ }
+
+ /** Asserts that two breakiterators break the text the same way */
+ public static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
+ expected.setText(one);
+ actual.setText(two);
+
+ assertEquals(expected.current(), actual.current());
+
+ // next()
+ int v = expected.current();
+ while (v != BreakIterator.DONE) {
+ assertEquals(v = expected.next(), actual.next());
+ assertEquals(expected.current(), actual.current());
+ }
+
+ // first()
+ assertEquals(expected.first(), actual.first());
+ assertEquals(expected.current(), actual.current());
+ // last()
+ assertEquals(expected.last(), actual.last());
+ assertEquals(expected.current(), actual.current());
+
+ // previous()
+ v = expected.current();
+ while (v != BreakIterator.DONE) {
+ assertEquals(v = expected.previous(), actual.previous());
+ assertEquals(expected.current(), actual.current());
+ }
+
+ // following()
+ for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
+ expected.first();
+ actual.first();
+ assertEquals(expected.following(i), actual.following(i));
+ assertEquals(expected.current(), actual.current());
+ }
+
+ // preceding()
+ for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
+ expected.last();
+ actual.last();
+ assertEquals(expected.preceding(i), actual.preceding(i));
+ assertEquals(expected.current(), actual.current());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0d3c73ea/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
index e9c842c..bd32fc1 100644
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@@ -28,13 +28,13 @@ import java.util.function.Predicate;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator;
-import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
+import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
+import org.apache.lucene.search.uhighlight.WholeBreakIterator;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;