You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/02/05 14:59:26 UTC
svn commit: r1442590 - in /lucene/dev/trunk:
dev-tools/maven/lucene/highlighter/ lucene/ lucene/highlighter/
lucene/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
lucene/hi...
Author: simonw
Date: Tue Feb 5 13:59:26 2013
New Revision: 1442590
URL: http://svn.apache.org/viewvc?rev=1442590&view=rev
Log:
LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten against the highlighting IndexReader to obtain primitive queries before discarding the query entirely.
Modified:
lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/build.xml
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Modified: lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template (original)
+++ lucene/dev/trunk/dev-tools/maven/lucene/highlighter/pom.xml.template Tue Feb 5 13:59:26 2013
@@ -61,6 +61,11 @@
<artifactId>lucene-memory</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>lucene-queries</artifactId>
+ <version>${project.version}</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Feb 5 13:59:26 2013
@@ -94,6 +94,12 @@ New Features
* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
creation via the resulting factories using NewAnalyzerTask. (Steve Rowe)
+* LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten
+ against the highlighting IndexReader to obtain primitive queries before
+ discarding the query entirely. WeightedSpanTermExtractor now builds a
+ MemoryIndex only once even if multiple fields are highlighted.
+ (Simon Willnauer)
+
API Changes
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
Modified: lucene/dev/trunk/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/build.xml?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/build.xml (original)
+++ lucene/dev/trunk/lucene/highlighter/build.xml Tue Feb 5 13:59:26 2013
@@ -27,6 +27,7 @@
<path id="classpath">
<pathelement path="${memory.jar}"/>
+ <pathelement path="${queries.jar}"/>
<path refid="base.classpath"/>
</path>
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Tue Feb 5 13:59:26 2013
@@ -187,11 +187,9 @@ public class Highlighter
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
StringBuilder newText=new StringBuilder();
- CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- tokenStream.addAttribute(PositionIncrementAttribute.class);
- tokenStream.reset();
-
+ CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+ tokenStream.reset();
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
if (fragmentScorer instanceof QueryScorer) {
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Tue Feb 5 13:59:26 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.search.highlig
*/
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -29,11 +29,18 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
@@ -44,6 +51,8 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@@ -53,12 +62,13 @@ public class WeightedSpanTermExtractor {
private String fieldName;
private TokenStream tokenStream;
- private Map<String,AtomicReaderContext> readers = new HashMap<String,AtomicReaderContext>(10);
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
private boolean wrapToCaching = true;
private int maxDocCharsToAnalyze;
+ private AtomicReader reader = null;
+
public WeightedSpanTermExtractor() {
}
@@ -69,18 +79,6 @@ public class WeightedSpanTermExtractor {
}
}
- private void closeReaders() {
- Collection<AtomicReaderContext> ctxSet = readers.values();
-
- for (final AtomicReaderContext ctx : ctxSet) {
- try {
- ctx.reader().close();
- } catch (IOException e) {
- // alert?
- }
- }
- }
-
/**
* Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
*
@@ -146,21 +144,14 @@ public class WeightedSpanTermExtractor {
if (q != null) {
extract(q, terms);
}
+ } else if (query instanceof CommonTermsQuery) {
+ // specialized since rewriting would change the result query
+ // this query is TermContext sensitive.
+ extractWeightedTerms(terms, query);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
}
- } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
- MultiTermQuery mtq = ((MultiTermQuery)query);
- if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
- mtq = (MultiTermQuery) mtq.clone();
- mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- query = mtq;
- }
- if (mtq.getField() != null) {
- IndexReader ir = getLeafContextForField(mtq.getField()).reader();
- extract(query.rewrite(ir), terms);
- }
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List<Term[]> termArrays = mpq.getTermArrays();
@@ -210,12 +201,30 @@ public class WeightedSpanTermExtractor {
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
+ } else {
+ Query origQuery = query;
+ if (query instanceof MultiTermQuery) {
+ if (!expandMultiTermQuery) {
+ return;
+ }
+ MultiTermQuery copy = (MultiTermQuery) query.clone();
+ copy.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ origQuery = copy;
+ }
+ final IndexReader reader = getLeafContext().reader();
+ Query rewritten = origQuery.rewrite(reader);
+ if (rewritten != origQuery) {
+ // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+ // if this method is overwritten in a subclass or above in the next recursion
+ extract(rewritten, terms);
+ }
}
extractUnknownQuery(query, terms);
}
protected void extractUnknownQuery(Query query,
Map<String, WeightedSpanTerm> terms) throws IOException {
+
// for sub-classing to extract custom queries
}
@@ -249,7 +258,7 @@ public class WeightedSpanTermExtractor {
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
- final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader());
+ final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.extractTerms(nonWeightedTerms);
}
@@ -266,7 +275,7 @@ public class WeightedSpanTermExtractor {
} else {
q = spanQuery;
}
- AtomicReaderContext context = getLeafContextForField(field);
+ AtomicReaderContext context = getLeafContext();
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> extractedTerms = new TreeSet<Term>();
q.extractTerms(extractedTerms);
@@ -338,23 +347,69 @@ public class WeightedSpanTermExtractor {
return rv;
}
- protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
- if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
- tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
- cachedTokenStream = true;
- }
- AtomicReaderContext context = readers.get(field);
- if (context == null) {
- MemoryIndex indexer = new MemoryIndex();
- indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ protected AtomicReaderContext getLeafContext() throws IOException {
+ if (reader == null) {
+ if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
+ assert !cachedTokenStream;
+ tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ cachedTokenStream = true;
+ }
+ final MemoryIndex indexer = new MemoryIndex(true);
+ indexer.addField(DelegatingAtomicReader.FIELD_NAME, tokenStream);
tokenStream.reset();
- IndexSearcher searcher = indexer.createSearcher();
+ final IndexSearcher searcher = indexer.createSearcher();
// MEM index has only atomic ctx
- context = (AtomicReaderContext) searcher.getTopReaderContext();
- readers.put(field, context);
+ reader = new DelegatingAtomicReader(((AtomicReaderContext)searcher.getTopReaderContext()).reader());
}
+ return reader.getContext();
+ }
+
+ /*
+ * This reader will just delegate every call to a single field in the wrapped
+ * AtomicReader. This way we only need to build this field once rather than
+ * N-Times
+ */
+ static final class DelegatingAtomicReader extends FilterAtomicReader {
+ private static final String FIELD_NAME = "shadowed_field";
- return context;
+ DelegatingAtomicReader(AtomicReader in) {
+ super(in);
+ }
+
+ @Override
+ public FieldInfos getFieldInfos() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ return new FilterFields(super.fields()) {
+ @Override
+ public Terms terms(String field) throws IOException {
+ return super.terms(DelegatingAtomicReader.FIELD_NAME);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
+ }
+
+ @Override
+ public int size() {
+ return 1;
+ }
+ };
+ }
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return super.docValues(FIELD_NAME);
+ }
+
+ @Override
+ public DocValues normValues(String field) throws IOException {
+ return super.normValues(FIELD_NAME);
+ }
}
/**
@@ -401,7 +456,7 @@ public class WeightedSpanTermExtractor {
try {
extract(query, terms);
} finally {
- closeReaders();
+ IOUtils.close(reader);
}
return terms;
@@ -449,8 +504,7 @@ public class WeightedSpanTermExtractor {
weightedSpanTerm.weight *= idf;
}
} finally {
-
- closeReaders();
+ IOUtils.close(reader);
}
return terms;
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Tue Feb 5 13:59:26 2013
@@ -28,9 +28,12 @@ import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
@@ -92,8 +95,7 @@ public class FieldQuery {
if( !clause.isProhibited() )
flatten( clause.getQuery(), reader, flatQueries );
}
- }
- else if( sourceQuery instanceof DisjunctionMaxQuery ){
+ } else if( sourceQuery instanceof DisjunctionMaxQuery ){
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
for( Query query : dmq ){
flatten( query, reader, flatQueries );
@@ -103,12 +105,6 @@ public class FieldQuery {
if( !flatQueries.contains( sourceQuery ) )
flatQueries.add( sourceQuery );
}
- else if (sourceQuery instanceof MultiTermQuery && reader != null) {
- MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
- copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
- BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
- flatten(mtqTerms, reader, flatQueries);
- }
else if( sourceQuery instanceof PhraseQuery ){
if( !flatQueries.contains( sourceQuery ) ){
PhraseQuery pq = (PhraseQuery)sourceQuery;
@@ -118,6 +114,31 @@ public class FieldQuery {
flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
}
}
+ } else if (sourceQuery instanceof ConstantScoreQuery) {
+ final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
+ if (q != null) {
+ flatten(q, reader, flatQueries);
+ }
+ } else if (sourceQuery instanceof FilteredQuery) {
+ final Query q = ((FilteredQuery) sourceQuery).getQuery();
+ if (q != null) {
+ flatten(q, reader, flatQueries);
+ }
+ } else if (reader != null){
+ Query query = sourceQuery;
+ if (sourceQuery instanceof MultiTermQuery) {
+ MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
+ copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
+ query = copy;
+ }
+ Query rewritten = query.rewrite(reader);
+ if (rewritten != query) {
+ // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+ // if this method is overwritten in a subclass.
+ flatten(rewritten, reader, flatQueries);
+
+ }
+ // if the query is already rewritten we discard it
}
// else discard queries
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue Feb 5 13:59:26 2013
@@ -46,6 +46,7 @@ import org.apache.lucene.index.StoredDoc
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
@@ -114,6 +115,87 @@ public class HighlighterTest extends Bas
}
}
+ public void testHighlightingCommonTermsQuery() throws Exception {
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+ query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "long"));
+ query.add(new Term(FIELD_NAME, "very"));
+
+ searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(scorer);
+
+ StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+ String storedField = doc.get(FIELD_NAME);
+
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+ highlighter.setTextFragmenter(fragmenter);
+ String fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+
+ doc = searcher.doc(hits.scoreDocs[1].doc);
+ storedField = doc.get(FIELD_NAME);
+
+ stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+ fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+ }
+
+ public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException {
+ Query query = new Query() {
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+ query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "long"));
+ query.add(new Term(FIELD_NAME, "very"));
+ return query;
+ }
+
+ @Override
+ public String toString(String field) {
+ return null;
+ }
+
+ };
+
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+
+ searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(scorer);
+
+ StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+ String storedField = doc.get(FIELD_NAME);
+
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+ highlighter.setTextFragmenter(fragmenter);
+ String fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+
+ doc = searcher.doc(hits.scoreDocs[1].doc);
+ storedField = doc.get(FIELD_NAME);
+
+ stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+ fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+
+ }
+
public void testHighlightingWithDefaultField() throws Exception {
String s1 = "I call our world Flatland, not because we call it so,";
@@ -150,7 +232,7 @@ public class HighlighterTest extends Bas
"Query in a named field does not result in highlighting when that field isn't in the query",
s1, highlightField(q, FIELD_NAME, s1));
}
-
+
/**
* This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
*/
@@ -603,7 +685,7 @@ public class HighlighterTest extends Bas
// Not sure we can assert anything here - just running to check we dont
// throw any exceptions
}
-
+
public void testSpanHighlighting() throws Exception {
Query query1 = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Tue Feb 5 13:59:26 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorh
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -26,7 +28,13 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -62,4 +70,47 @@ public class FastVectorHighlighterTest e
writer.close();
dir.close();
}
+
+ public void testCommonTermsQueryHighlightTest() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+ FieldType type = new FieldType(TextField.TYPE_STORED);
+ type.setStoreTermVectorOffsets(true);
+ type.setStoreTermVectorPositions(true);
+ type.setStoreTermVectors(true);
+ type.freeze();
+ String[] texts = {
+ "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
+ "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
+ "JFK has been shot", "John Kennedy has been shot",
+ "This text has a typo in referring to Keneddy",
+ "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
+ for (int i = 0; i < texts.length; i++) {
+ Document doc = new Document();
+ Field field = new Field("field", texts[i], type);
+ doc.add(field);
+ writer.addDocument(doc);
+ }
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
+ query.add(new Term("field", "text"));
+ query.add(new Term("field", "long"));
+ query.add(new Term("field", "very"));
+
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
+ assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
+
+ fieldQuery = highlighter.getFieldQuery(query, reader);
+ bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1);
+ assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Tue Feb 5 13:59:26 2013
@@ -23,8 +23,13 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
@@ -35,6 +40,7 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
public class FieldQueryTest extends AbstractTestCase {
@@ -905,4 +911,40 @@ public class FieldQueryTest extends Abst
assertNotNull (fq.searchPhrase(F, phraseCandidate));
}
+ public void testStopRewrite() throws Exception {
+ Query q = new Query() {
+
+ @Override
+ public String toString(String field) {
+ return "DummyQuery";
+ }
+
+ };
+ make1d1fIndex( "a" );
+ assertNotNull(reader);
+ new FieldQuery(q, reader, true, true );
+ }
+
+ public void testFlattenFilteredQuery() throws Exception {
+ Query query = new FilteredQuery(pqF( "A" ), new Filter() {
+ @Override
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+ throws IOException {
+ return null;
+ }
+ });
+ FieldQuery fq = new FieldQuery( query, true, true );
+ Set<Query> flatQueries = new HashSet<Query>();
+ fq.flatten( query, reader, flatQueries );
+ assertCollectionQueries( flatQueries, tq( "A" ) );
+ }
+
+ public void testFlattenConstantScoreQuery() throws Exception {
+ Query query = new ConstantScoreQuery(pqF( "A" ));
+ FieldQuery fq = new FieldQuery( query, true, true );
+ Set<Query> flatQueries = new HashSet<Query>();
+ fq.flatten( query, reader, flatQueries );
+ assertCollectionQueries( flatQueries, tq( "A" ) );
+ }
+
}
Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1442590&r1=1442589&r2=1442590&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Feb 5 13:59:26 2013
@@ -465,7 +465,9 @@ public class MemoryIndex {
throw new RuntimeException(e);
} finally {
try {
- if (stream != null) stream.close();
+ if (stream != null) {
+ stream.close();
+ }
} catch (IOException e2) {
throw new RuntimeException(e2);
}