You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/13 16:38:40 UTC

svn commit: r1639372 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/benchmark/ lucene/highlighter/ lucene/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/highlighter/src/test/org/apache/lucene/search/highlight/ lucene/join/ luce...

Author: mikemccand
Date: Thu Nov 13 15:38:39 2014
New Revision: 1639372

URL: http://svn.apache.org/r1639372
Log:
LUCENE-5929: highlight terms from block join queries too

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_5x/lucene/benchmark/build.xml
    lucene/dev/branches/branch_5x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_5x/lucene/highlighter/build.xml
    lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/dev/branches/branch_5x/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/branches/branch_5x/lucene/join/   (props changed)
    lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
    lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Nov 13 15:38:39 2014
@@ -81,6 +81,9 @@ New Features
   each context passed to Analyzing/BlendedInfixSuggester (Arcadius
   Ahouansou, jane chang via Mike McCandless)
 
+* LUCENE-5929: Also extract terms to highlight from block join
+  queries. (Julie Tibshirani via Mike McCandless)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

Modified: lucene/dev/branches/branch_5x/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/benchmark/build.xml?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/branch_5x/lucene/benchmark/build.xml Thu Nov 13 15:38:39 2014
@@ -174,6 +174,7 @@
       <pathelement path="${spatial.jar}"/>
       <pathelement path="${queries.jar}"/>
       <pathelement path="${codecs.jar}"/>
+      <pathelement path="${join.jar}"/>
       <path refid="base.classpath"/>
       <fileset dir="lib"/>
     </path>
@@ -276,7 +277,7 @@
       <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
     </target>
 
-    <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs"/>
+    <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs,jar-join"/>
   
     <target name="compile-test" depends="copy-alg-files-for-testing,module-build.compile-test"/>
     <target name="copy-alg-files-for-testing" description="copy .alg files as resources for testing">

Modified: lucene/dev/branches/branch_5x/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/highlighter/build.xml?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/highlighter/build.xml (original)
+++ lucene/dev/branches/branch_5x/lucene/highlighter/build.xml Thu Nov 13 15:38:39 2014
@@ -31,10 +31,13 @@
   <path id="classpath">
     <pathelement path="${memory.jar}"/>
     <pathelement path="${queries.jar}"/>
+    <pathelement path="${join.jar}"/>
     <path refid="base.classpath"/>
   </path>
 
-  <target name="compile-core" depends="jar-memory, common.compile-core" />
+    <target name="init" depends="module-build.init,jar-memory,jar-queries,jar-join"/>
+
+  <target name="compile-core" depends="jar-memory, common.compile-core, jar-join" />
   <target name="javadocs" depends="javadocs-memory,compile-core,check-javadocs-uptodate"
           unless="javadocs-uptodate-${name}">
     <invoke-module-javadoc>

Modified: lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/branch_5x/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Thu Nov 13 15:38:39 2014
@@ -44,6 +44,8 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
@@ -154,6 +156,10 @@ public class WeightedSpanTermExtractor {
       for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
         extract(iterator.next(), terms);
       }
+    } else if (query instanceof ToParentBlockJoinQuery) {
+      extract(((ToParentBlockJoinQuery) query).getChildQuery(), terms);
+    } else if (query instanceof ToChildBlockJoinQuery) {
+      extract(((ToChildBlockJoinQuery) query).getParentQuery(), terms);
     } else if (query instanceof MultiPhraseQuery) {
       final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
       final List<Term[]> termArrays = mpq.getTermArrays();

Modified: lucene/dev/branches/branch_5x/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Thu Nov 13 15:38:39 2014
@@ -21,12 +21,12 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;
-
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
@@ -49,6 +49,11 @@ import org.apache.lucene.queries.CommonT
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
+import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
+import org.apache.lucene.search.join.BitDocIdSetFilter;
+import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.search.spans.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -514,6 +519,62 @@ public class HighlighterTest extends Bas
 
 
   }
+  
+  public void testToParentBlockJoinQuery() throws Exception {
+    BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+        new QueryWrapperFilter(
+          new TermQuery(new Term(FIELD_NAME, "parent"))));
+    
+    query = new ToParentBlockJoinQuery(new TermQuery(new Term(FIELD_NAME, "child")),
+        parentFilter, ScoreMode.None);
+    searcher = newSearcher(reader);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = "child document";
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+      
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+    }
+    
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 1);
+  }
+  
+  public void testToChildBlockJoinQuery() throws Exception {
+    BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+        new QueryWrapperFilter(
+          new TermQuery(new Term(FIELD_NAME, "parent"))));
+    
+    BooleanQuery booleanQuery = new BooleanQuery();
+    booleanQuery.add(new ToChildBlockJoinQuery(new TermQuery(
+        new Term(FIELD_NAME, "parent")), parentFilter, false), Occur.MUST);
+    booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "child")), Occur.MUST);
+    query = booleanQuery;
+    
+    searcher = newSearcher(reader);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = "parent document";
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+      
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+    }
+    
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 1);
+  }
 
   public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
     PhraseQuery phraseQuery = new PhraseQuery();
@@ -1900,6 +1961,10 @@ public class HighlighterTest extends Bas
     doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
     writer.addDocument(doc, analyzer);
 
+    Document childDoc = doc(FIELD_NAME, "child document");
+    Document parentDoc = doc(FIELD_NAME, "parent document");
+    writer.addDocuments(Arrays.asList(childDoc, parentDoc));
+    
     writer.forceMerge(1);
     writer.close();
     reader = DirectoryReader.open(ramDir);

Modified: lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Thu Nov 13 15:38:39 2014
@@ -90,6 +90,11 @@ public class ToChildBlockJoinQuery exten
     return new ToChildBlockJoinWeight(this, parentQuery.createWeight(searcher), parentsFilter, doScores);
   }
 
+  /** Return our parent query. */
+  public Query getParentQuery() {
+    return parentQuery;
+  }
+
   private static class ToChildBlockJoinWeight extends Weight {
     private final Query joinQuery;
     private final Weight parentWeight;

Modified: lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1639372&r1=1639371&r2=1639372&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Thu Nov 13 15:38:39 2014
@@ -122,6 +122,11 @@ public class ToParentBlockJoinQuery exte
   public Weight createWeight(IndexSearcher searcher) throws IOException {
     return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
   }
+  
+  /** Return our child query. */
+  public Query getChildQuery() {
+    return childQuery;
+  }
 
   private static class BlockJoinWeight extends Weight {
     private final Query joinQuery;