You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/05/15 17:05:54 UTC

svn commit: r1679586 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/

Author: mikemccand
Date: Fri May 15 15:05:54 2015
New Revision: 1679586

URL: http://svn.apache.org/r1679586
Log:
LUCENE-6464: add expert AnalyzingInfixSuggester.lookup method that accepts arbitrary BooleanQuery to filter contexts

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1679586&r1=1679585&r2=1679586&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri May 15 15:05:54 2015
@@ -69,6 +69,11 @@ New Features
   Circle, Path (buffered line string), and Polygon.
   (Karl Wright via David Smiley)
 
+* LUCENE-6464: Add a new expert lookup method to
+  AnalyzingInfixSuggester to accept an arbitrary BooleanQuery to
+  express how contexts should be filtered. (Arcadius Ahouansou via
+  Mike McCandless)
+
 Optimizations
 
 * LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1679586&r1=1679585&r2=1679586&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Fri May 15 15:05:54 2015
@@ -24,7 +24,6 @@ import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -62,6 +61,7 @@ import org.apache.lucene.index.SortedSet
 import org.apache.lucene.index.SortingMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.EarlyTerminatingSortingCollector;
@@ -443,22 +443,13 @@ public class AnalyzingInfixSuggester ext
 
   /** Lookup, without any context. */
   public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
-    return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
+    return lookup(key, (BooleanQuery)null, num, allTermsRequired, doHighlight);
   }
 
   /** Lookup, with context but without booleans. Context booleans default to SHOULD,
    *  so each suggestion must have at least one of the contexts. */
   public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
-
-    if (contexts == null) {
-      return lookup(key, num, allTermsRequired, doHighlight);
-    }
-
-    Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
-    for (BytesRef context : contexts) {
-      contextInfo.put(context, BooleanClause.Occur.SHOULD);
-    }
-    return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
+    return lookup(key, toQuery(contexts), num, allTermsRequired, doHighlight);
   }
 
   /** This is called if the last token isn't ended
@@ -477,6 +468,66 @@ public class AnalyzingInfixSuggester ext
    *  must match ({@code allTermsRequired}) and whether the hits
    *  should be highlighted ({@code doHighlight}). */
   public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+      return lookup(key, toQuery(contextInfo), num, allTermsRequired, doHighlight);
+  }
+
+  private BooleanQuery toQuery(Map<BytesRef,BooleanClause.Occur> contextInfo) {
+    if (contextInfo == null || contextInfo.isEmpty()) {
+      return null;
+    }
+    
+    BooleanQuery contextFilter = new BooleanQuery();
+    for (Map.Entry<BytesRef,BooleanClause.Occur> entry : contextInfo.entrySet()) {
+      addContextToQuery(contextFilter, entry.getKey(), entry.getValue());
+    }
+    
+    return contextFilter;
+  }
+
+  private BooleanQuery toQuery(Set<BytesRef> contextInfo) {
+    if (contextInfo == null || contextInfo.isEmpty()) {
+      return null;
+    }
+    
+    BooleanQuery contextFilter = new BooleanQuery();
+    for (BytesRef context : contextInfo) {
+      addContextToQuery(contextFilter, context, BooleanClause.Occur.SHOULD);
+    }
+    return contextFilter;
+  }
+
+  
+  /**
+   * This method is handy as we do not need access to internal fields such as CONTEXTS_FIELD_NAME in order to build queries
+   * However, here may not be its best location.
+   * 
+   * @param query an instance of @See {@link BooleanQuery}
+   * @param context the context
+   * @param clause one of {@link Occur}
+   */
+  public void addContextToQuery(BooleanQuery query, BytesRef context, BooleanClause.Occur clause) {
+    // NOTE: we "should" wrap this in
+    // ConstantScoreQuery, or maybe send this as a
+    // Filter instead to search.
+    
+    // TODO: if we had a BinaryTermField we could fix
+    // this "must be valid ut8f" limitation:
+    query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context)), clause);
+  }
+
+  /**
+   * This is an advanced method providing the capability to send down to the suggester any 
+   * arbitrary lucene query to be used to filter the result of the suggester
+   * 
+   * @param key the keyword being looked for
+   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
+   * @param num number of items to return
+   * @param allTermsRequired all searched terms must match or not
+   * @param doHighlight if true, the matching term will be highlighted in the search result
+   * @return the result of the suggester
+   * @throws IOException f the is IO exception while reading data from the index
+   */
+  public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
 
     if (searcherMgr == null) {
       throw new IllegalStateException("suggester was not built");
@@ -490,7 +541,7 @@ public class AnalyzingInfixSuggester ext
     }
 
     BooleanQuery query;
-    Set<String> matchedTokens = new HashSet<>();
+    Set<String> matchedTokens;
     String prefixToken = null;
 
     try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
@@ -532,41 +583,33 @@ public class AnalyzingInfixSuggester ext
           matchedTokens.add(lastToken);
           lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
         }
+        
         if (lastQuery != null) {
           query.add(lastQuery, occur);
         }
       }
 
-      if (contextInfo != null) {
-        
+      if (contextQuery != null) {
         boolean allMustNot = true;
-        for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
-          if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
+        for (BooleanClause clause : contextQuery.clauses()) {
+          if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
             allMustNot = false;
             break;
           }
         }
-
-        // do not make a subquery if all context booleans are must not
-        if (allMustNot == true) {
-          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
-            query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
+        
+        if (allMustNot) {
+          //all are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
+          for (BooleanClause clause : contextQuery.clauses()) {
+            query.add(clause);
           }
-
         } else {
-          BooleanQuery sub = new BooleanQuery();
-          query.add(sub, BooleanClause.Occur.MUST);
-
-          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
-            // NOTE: we "should" wrap this in
-            // ConstantScoreQuery, or maybe send this as a
-            // Filter instead to search.
-            sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
-          }
+          //Add contextQuery as sub-query
+          query.add(contextQuery, BooleanClause.Occur.MUST);
         }
       }
     }
-
+    
     // TODO: we could allow blended sort here, combining
     // weight w/ score.  Now we ignore score and sort only
     // by weight:
@@ -602,7 +645,7 @@ public class AnalyzingInfixSuggester ext
 
     return results;
   }
-
+  
   /**
    * Create the results based on the search hits.
    * Can be overridden by subclass to add particular behavior (e.g. weight transformation).

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1679586&r1=1679585&r2=1679586&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Fri May 15 15:05:54 2015
@@ -35,6 +35,7 @@ import org.apache.lucene.index.MultiDocV
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopFieldDocs;
@@ -175,6 +176,12 @@ public class BlendedInfixSuggester exten
   }
 
   @Override
+  public List<Lookup.LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+    // here we multiply the number of searched element by the defined factor
+    return super.lookup(key, contextQuery, num * numFactor, allTermsRequired, doHighlight);
+  }
+  
+  @Override
   protected FieldType getTextFieldType() {
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java?rev=1679586&r1=1679585&r2=1679586&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java Fri May 15 15:05:54 2015
@@ -38,6 +38,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
@@ -949,7 +950,7 @@ public class AnalyzingInfixSuggesterTest
     return result;
   }
 
-  // LUCENE-5528
+  // LUCENE-5528 and LUCENE-6464
   public void testBasicContext() throws Exception {
     Input keys[] = new Input[] {
       new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
@@ -1174,7 +1175,15 @@ public class AnalyzingInfixSuggesterTest
       assertEquals(2, result.contexts.size());
       assertTrue(result.contexts.contains(new BytesRef("foo")));
       assertTrue(result.contexts.contains(new BytesRef("baz")));
-
+      
+      //LUCENE-6464 Using the advanced context filtering by query. 
+      //Note that this is just a sanity test as all the above tests run through the filter by query method
+      BooleanQuery query = new BooleanQuery();
+      suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST);
+      suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query, 10, true, true);
+      assertEquals(1, results.size());
+      
       suggester.close();
       a.close();
     }