You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/01/08 11:11:27 UTC

svn commit: r1556483 - in /lucene/dev/trunk/lucene: ./ highlighter/src/java/org/apache/lucene/search/vectorhighlight/ highlighter/src/test/org/apache/lucene/search/vectorhighlight/

Author: jpountz
Date: Wed Jan  8 10:11:27 2014
New Revision: 1556483

URL: http://svn.apache.org/r1556483
Log:
LUCENE-5361: Fixed handling of query boosts in FastVectorHighlighter.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1556483&r1=1556482&r2=1556483&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jan  8 10:11:27 2014
@@ -131,6 +131,9 @@ Bug fixes
   like when index / flush requests happen concurrently to the close or
   rollback call. (Simon Willnauer)
 
+* LUCENE-5361: Fixed handling of query boosts in FastVectorHighlighter.
+  (Nik Everett via Adrien Grand)
+
 API Changes
 
 * LUCENE-5339: The facet module was simplified/reworked to make the

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1556483&r1=1556482&r2=1556483&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Wed Jan  8 10:11:27 2014
@@ -91,14 +91,15 @@ public class FieldQuery {
   void flatten( Query sourceQuery, IndexReader reader, Collection<Query> flatQueries ) throws IOException{
     if( sourceQuery instanceof BooleanQuery ){
       BooleanQuery bq = (BooleanQuery)sourceQuery;
-      for( BooleanClause clause : bq.getClauses() ){
-        if( !clause.isProhibited() )
-          flatten( clause.getQuery(), reader, flatQueries );
+      for( BooleanClause clause : bq ) {
+        if( !clause.isProhibited() ) {
+          flatten( applyParentBoost( clause.getQuery(), bq ), reader, flatQueries );
+        }
       }
     } else if( sourceQuery instanceof DisjunctionMaxQuery ){
       DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
       for( Query query : dmq ){
-        flatten( query, reader, flatQueries );
+        flatten( applyParentBoost( query, dmq ), reader, flatQueries );
       }
     }
     else if( sourceQuery instanceof TermQuery ){
@@ -111,18 +112,20 @@ public class FieldQuery {
         if( pq.getTerms().length > 1 )
           flatQueries.add( pq );
         else if( pq.getTerms().length == 1 ){
-          flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
+          Query flat = new TermQuery( pq.getTerms()[0] );
+          flat.setBoost( pq.getBoost() );
+          flatQueries.add( flat );
         }
       }
     } else if (sourceQuery instanceof ConstantScoreQuery) {
       final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
       if (q != null) {
-        flatten(q, reader, flatQueries);
+        flatten( applyParentBoost( q, sourceQuery ), reader, flatQueries);
       }
     } else if (sourceQuery instanceof FilteredQuery) {
       final Query q = ((FilteredQuery) sourceQuery).getQuery();
       if (q != null) {
-        flatten(q, reader, flatQueries);
+        flatten( applyParentBoost( q, sourceQuery ), reader, flatQueries);
       }
     } else if (reader != null){
       Query query = sourceQuery;
@@ -142,6 +145,18 @@ public class FieldQuery {
     }
     // else discard queries
   }
+
+  /**
+   * Push parent's boost into a clone of query if parent has a non 1 boost.
+   */
+  protected Query applyParentBoost( Query query, Query parent ) {
+    if ( parent.getBoost() == 1 ) {
+      return query;
+    }
+    Query cloned = query.clone();
+    cloned.setBoost( query.getBoost() * parent.getBoost() );
+    return cloned;
+  }
   
   /*
    * Create expandQueries from flatQueries.

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1556483&r1=1556482&r2=1556483&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Wed Jan  8 10:11:27 2014
@@ -257,8 +257,53 @@ public class FastVectorHighlighterTest e
     writer.close();
     dir.close();
   }
-  
-  public void testCommonTermsQueryHighlightTest() throws IOException {
+
+  public void testBoostedPhraseHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( random() ) ) );
+    Document doc = new Document();
+    FieldType type = new FieldType( TextField.TYPE_STORED  );
+    type.setStoreTermVectorOffsets( true );
+    type.setStoreTermVectorPositions( true );
+    type.setStoreTermVectors( true );
+    type.freeze();
+    StringBuilder text = new StringBuilder();
+    text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk ");
+    for ( int i = 0; i<10; i++ ) {
+      text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
+    }
+    text.append("highlight words together ");
+    for ( int i = 0; i<10; i++ ) {
+      text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk ");
+    }
+    doc.add( new Field( "text", text.toString().trim(), type ) );
+    writer.addDocument(doc);
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+
+    // This mimics what some query parsers do to <highlight words together>
+    BooleanQuery terms = new BooleanQuery();
+    terms.add( clause( "text", "highlight" ), Occur.MUST );
+    terms.add( clause( "text", "words" ), Occur.MUST );
+    terms.add( clause( "text", "together" ), Occur.MUST );
+    // This mimics what some query parsers do to <"highlight words together">
+    BooleanQuery phrase = new BooleanQuery();
+    phrase.add( clause( "text", "highlight", "words", "together" ), Occur.MUST );
+    phrase.setBoost( 100 );
+    // Now combine those results in a boolean query which should pull the phrases to the front of the list of fragments 
+    BooleanQuery query = new BooleanQuery();
+    query.add( phrase, Occur.MUST );
+    query.add( phrase, Occur.SHOULD );
+    FieldQuery fieldQuery = new FieldQuery( query, reader, true, false );
+    String fragment = highlighter.getBestFragment( fieldQuery, reader, 0, "text", 100 );
+    assertEquals( "junk junk junk junk junk junk junk junk <b>highlight words together</b> junk junk junk junk junk junk junk junk", fragment );
+
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+
+  public void testCommonTermsQueryHighlight() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
     FieldType type = new FieldType(TextField.TYPE_STORED);

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1556483&r1=1556482&r2=1556483&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Wed Jan  8 10:11:27 2014
@@ -44,44 +44,55 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 public class FieldQueryTest extends AbstractTestCase {
+  private float boost;
+
+  /**
+   * Set boost to a random value each time it is called.
+   */
+  private void initBoost() {
+    boost = usually() ? 1F : random().nextFloat() * 10000;
+  }
 
   public void testFlattenBoolean() throws Exception {
+    initBoost();
     BooleanQuery booleanQuery = new BooleanQuery();
-    booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
-    booleanQuery.add(new TermQuery(new Term(F, "B")), Occur.MUST);
-    booleanQuery.add(new TermQuery(new Term(F, "C")), Occur.SHOULD);
+    booleanQuery.setBoost( boost );
+    booleanQuery.add(tq("A"), Occur.MUST);
+    booleanQuery.add(tq("B"), Occur.MUST);
+    booleanQuery.add(tq("C"), Occur.SHOULD);
 
     BooleanQuery innerQuery = new BooleanQuery();
-    innerQuery.add(new TermQuery(new Term(F, "D")), Occur.MUST);
-    innerQuery.add(new TermQuery(new Term(F, "E")), Occur.MUST);
+    innerQuery.add(tq("D"), Occur.MUST);
+    innerQuery.add(tq("E"), Occur.MUST);
     booleanQuery.add(innerQuery, Occur.MUST_NOT);
 
     FieldQuery fq = new FieldQuery(booleanQuery, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten(booleanQuery, reader, flatQueries);
-    assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), tq( "C" ) );
+    assertCollectionQueries( flatQueries, tq( boost, "A" ), tq( boost, "B" ), tq( boost, "C" ) );
   }
 
   public void testFlattenDisjunctionMaxQuery() throws Exception {
+    initBoost();
     Query query = dmq( tq( "A" ), tq( "B" ), pqF( "C", "D" ) );
+    query.setBoost( boost );
     FieldQuery fq = new FieldQuery( query, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten( query, reader, flatQueries );
-    assertCollectionQueries( flatQueries, tq( "A" ), tq( "B" ), pqF( "C", "D" ) );
+    assertCollectionQueries( flatQueries, tq( boost, "A" ), tq( boost, "B" ), pqF( boost, "C", "D" ) );
   }
 
   public void testFlattenTermAndPhrase() throws Exception {
+    initBoost();
     BooleanQuery booleanQuery = new BooleanQuery();
-    booleanQuery.add(new TermQuery(new Term(F, "A")), Occur.MUST);
-    PhraseQuery phraseQuery = new PhraseQuery();
-    phraseQuery.add(new Term(F, "B"));
-    phraseQuery.add(new Term(F, "C"));
-    booleanQuery.add(phraseQuery, Occur.MUST);
+    booleanQuery.setBoost( boost );
+    booleanQuery.add(tq("A"), Occur.MUST);
+    booleanQuery.add(pqF("B", "C"), Occur.MUST);
 
     FieldQuery fq = new FieldQuery(booleanQuery, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten(booleanQuery, reader, flatQueries);
-    assertCollectionQueries( flatQueries, tq( "A" ), pqF( "B", "C" ) );
+    assertCollectionQueries( flatQueries, tq( boost, "A" ), pqF( boost, "B", "C" ) );
   }
 
   public void testFlattenTermAndPhrase2gram() throws Exception {
@@ -926,6 +937,7 @@ public class FieldQueryTest extends Abst
   }
   
   public void testFlattenFilteredQuery() throws Exception {
+    initBoost();
     Query query = new FilteredQuery(pqF( "A" ), new Filter() {
       @Override
       public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
@@ -933,18 +945,21 @@ public class FieldQueryTest extends Abst
         return null;
       }
     });
+    query.setBoost(boost);
     FieldQuery fq = new FieldQuery( query, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten( query, reader, flatQueries );
-    assertCollectionQueries( flatQueries, tq( "A" ) );
+    assertCollectionQueries( flatQueries, tq( boost, "A" ) );
   }
   
   public void testFlattenConstantScoreQuery() throws Exception {
+    initBoost();
     Query query = new ConstantScoreQuery(pqF( "A" ));
+    query.setBoost(boost);
     FieldQuery fq = new FieldQuery( query, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten( query, reader, flatQueries );
-    assertCollectionQueries( flatQueries, tq( "A" ) );
+    assertCollectionQueries( flatQueries, tq( boost, "A" ) );
   }
   
 }