You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/01/08 11:23:02 UTC
svn commit: r1556485 - in /lucene/dev/branches/lucene_solr_4_6: ./ lucene/
lucene/highlighter/
lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/
Author: jpountz
Date: Wed Jan 8 10:23:02 2014
New Revision: 1556485
URL: http://svn.apache.org/r1556485
Log:
LUCENE-5361: Fixed handling of query boosts in FastVectorHighlighter.
Modified:
lucene/dev/branches/lucene_solr_4_6/ (props changed)
lucene/dev/branches/lucene_solr_4_6/lucene/ (props changed)
lucene/dev/branches/lucene_solr_4_6/lucene/CHANGES.txt
lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/ (props changed)
lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
Modified: lucene/dev/branches/lucene_solr_4_6/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/lucene/CHANGES.txt?rev=1556485&r1=1556484&r2=1556485&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_6/lucene/CHANGES.txt Wed Jan 8 10:23:02 2014
@@ -11,6 +11,9 @@ Bug fixes
[Lucene40/Lucene42/Memory/Direct]DocValuesFormat was over-estimated.
(Shay Banon, Adrien Grand, Robert Muir)
+* LUCENE-5361: Fixed handling of query boosts in FastVectorHighlighter.
+ (Nik Everett via Adrien Grand)
+
======================= Lucene 4.6.0 =======================
New Features
Modified: lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1556485&r1=1556484&r2=1556485&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Wed Jan 8 10:23:02 2014
@@ -91,14 +91,15 @@ public class FieldQuery {
void flatten( Query sourceQuery, IndexReader reader, Collection<Query> flatQueries ) throws IOException{
if( sourceQuery instanceof BooleanQuery ){
BooleanQuery bq = (BooleanQuery)sourceQuery;
- for( BooleanClause clause : bq.getClauses() ){
- if( !clause.isProhibited() )
- flatten( clause.getQuery(), reader, flatQueries );
+ for( BooleanClause clause : bq ) {
+ if( !clause.isProhibited() ) {
+ flatten( applyParentBoost( clause.getQuery(), bq ), reader, flatQueries );
+ }
}
} else if( sourceQuery instanceof DisjunctionMaxQuery ){
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
for( Query query : dmq ){
- flatten( query, reader, flatQueries );
+ flatten( applyParentBoost( query, dmq ), reader, flatQueries );
}
}
else if( sourceQuery instanceof TermQuery ){
@@ -111,18 +112,20 @@ public class FieldQuery {
if( pq.getTerms().length > 1 )
flatQueries.add( pq );
else if( pq.getTerms().length == 1 ){
- flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
+ Query flat = new TermQuery( pq.getTerms()[0] );
+ flat.setBoost( pq.getBoost() );
+ flatQueries.add( flat );
}
}
} else if (sourceQuery instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
if (q != null) {
- flatten(q, reader, flatQueries);
+ flatten( applyParentBoost( q, sourceQuery ), reader, flatQueries);
}
} else if (sourceQuery instanceof FilteredQuery) {
final Query q = ((FilteredQuery) sourceQuery).getQuery();
if (q != null) {
- flatten(q, reader, flatQueries);
+ flatten( applyParentBoost( q, sourceQuery ), reader, flatQueries);
}
} else if (reader != null){
Query query = sourceQuery;
@@ -142,6 +145,18 @@ public class FieldQuery {
}
// else discard queries
}
+
+ /**
+ * Push parent's boost into a clone of query if parent has a non 1 boost.
+ */
+ protected Query applyParentBoost( Query query, Query parent ) {
+ if ( parent.getBoost() == 1 ) {
+ return query;
+ }
+ Query cloned = query.clone();
+ cloned.setBoost( query.getBoost() * parent.getBoost() );
+ return cloned;
+ }
/*
* Create expandQueries from flatQueries.
Modified: lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1556485&r1=1556484&r2=1556485&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Wed Jan 8 10:23:02 2014
@@ -257,8 +257,53 @@ public class FastVectorHighlighterTest e
writer.close();
dir.close();
}
-
- public void testCommonTermsQueryHighlightTest() throws IOException {
+
+ public void testBoostedPhraseHighlightTest() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( random() ) ) );
+ Document doc = new Document();
+ FieldType type = new FieldType( TextField.TYPE_STORED );
+ type.setStoreTermVectorOffsets( true );
+ type.setStoreTermVectorPositions( true );
+ type.setStoreTermVectors( true );
+ type.freeze();
+ StringBuilder text = new StringBuilder();
+ text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk ");
+ for ( int i = 0; i<10; i++ ) {
+ text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
+ }
+ text.append("highlight words together ");
+ for ( int i = 0; i<10; i++ ) {
+ text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
+ }
+ doc.add( new Field( "text", text.toString().trim(), type ) );
+ writer.addDocument(doc);
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+
+ // This mimics what some query parsers do to <highlight words together>
+ BooleanQuery terms = new BooleanQuery();
+ terms.add( clause( "text", "highlight" ), Occur.MUST );
+ terms.add( clause( "text", "words" ), Occur.MUST );
+ terms.add( clause( "text", "together" ), Occur.MUST );
+ // This mimics what some query parsers do to <"highlight words together">
+ BooleanQuery phrase = new BooleanQuery();
+ phrase.add( clause( "text", "highlight", "words", "together" ), Occur.MUST );
+ phrase.setBoost( 100 );
+ // Now combine those results in a boolean query which should pull the phrases to the front of the list of fragments
+ BooleanQuery query = new BooleanQuery();
+ query.add( phrase, Occur.MUST );
+ query.add( phrase, Occur.SHOULD );
+ FieldQuery fieldQuery = new FieldQuery( query, reader, true, false );
+ String fragment = highlighter.getBestFragment( fieldQuery, reader, 0, "text", 100 );
+ assertEquals( "junk junk junk junk junk junk junk junk <b>highlight words together</b> junk junk junk junk junk junk junk junk", fragment );
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
+
+ public void testCommonTermsQueryHighlight() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
FieldType type = new FieldType(TextField.TYPE_STORED);
Modified: lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1556485&r1=1556484&r2=1556485&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_6/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Wed Jan 8 10:23:02 2014
@@ -44,44 +44,55 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
public class FieldQueryTest extends AbstractTestCase {
+ private float boost;
+
+ /**
+ * Set boost to a random value each time it is called.
+ */
+ private void initBoost() {
+ boost = usually() ? 1F : random().nextFloat() * 10000;
+ }
public void testFlattenBoolean() throws Exception {
+ initBoost();
BooleanQuery booleanQuery = new BooleanQuery();
- booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
- booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
- booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
+ booleanQuery.setBoost( boost );
+ booleanQuery.add(tq("A"), Occur.MUST);
+ booleanQuery.add(tq("B"), Occur.MUST);
+ booleanQuery.add(tq("C"), Occur.SHOULD);
BooleanQuery innerQuery = new BooleanQuery();
- innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
- innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
+ innerQuery.add(tq("D"), Occur.MUST);
+ innerQuery.add(tq("E"), Occur.MUST);
booleanQuery.add(innerQuery, Occur.MUST_NOT);
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set<Query> flatQueries = new HashSet<Query>();
fq.flatten(booleanQuery, reader, flatQueries);
- assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
+ assertCollectionQueries( flatQueries, tq( boost, "A" ), tq( boost, "B" ), tq( boost, "C" ) );
}
public void testFlattenDisjunctionMaxQuery() throws Exception {
+ initBoost();
Query query = dmq( tq( "A" ), tq( "B" ), pqF( "C", "D" ) );
+ query.setBoost( boost );
FieldQuery fq = new FieldQuery( query, true, true );
Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, reader, flatQueries );
- assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), pqF( "C", "D" ) );
+ assertCollectionQueries( flatQueries, tq( boost, "A" ), tq( boost, "B" ), pqF( boost, "C", "D" ) );
}
public void testFlattenTermAndPhrase() throws Exception {
+ initBoost();
BooleanQuery booleanQuery = new BooleanQuery();
- booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
- PhraseQuery phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term(F, "B"));
- phraseQuery.add(new Term(F, "C"));
- booleanQuery.add(phraseQuery, Occur.MUST);
+ booleanQuery.setBoost( boost );
+ booleanQuery.add(tq("A"), Occur.MUST);
+ booleanQuery.add(pqF("B", "C"), Occur.MUST);
FieldQuery fq = new FieldQuery(booleanQuery, true, true );
Set<Query> flatQueries = new HashSet<Query>();
fq.flatten(booleanQuery, reader, flatQueries);
- assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
+ assertCollectionQueries( flatQueries, tq( boost, "A" ), pqF( boost, "B", "C" ) );
}
public void testFlattenTermAndPhrase2gram() throws Exception {
@@ -926,6 +937,7 @@ public class FieldQueryTest extends Abst
}
public void testFlattenFilteredQuery() throws Exception {
+ initBoost();
Query query = new FilteredQuery(pqF( "A" ), new Filter() {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
@@ -933,18 +945,21 @@ public class FieldQueryTest extends Abst
return null;
}
});
+ query.setBoost(boost);
FieldQuery fq = new FieldQuery( query, true, true );
Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, reader, flatQueries );
- assertCollectionQueries( flatQueries, tq( "A" ) );
+ assertCollectionQueries( flatQueries, tq( boost, "A" ) );
}
public void testFlattenConstantScoreQuery() throws Exception {
+ initBoost();
Query query = new ConstantScoreQuery(pqF( "A" ));
+ query.setBoost(boost);
FieldQuery fq = new FieldQuery( query, true, true );
Set<Query> flatQueries = new HashSet<Query>();
fq.flatten( query, reader, flatQueries );
- assertCollectionQueries( flatQueries, tq( "A" ) );
+ assertCollectionQueries( flatQueries, tq( boost, "A" ) );
}
}