You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/11 22:43:13 UTC

svn commit: r1638442 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/

Author: mikemccand
Date: Tue Nov 11 21:43:12 2014
New Revision: 1638442

URL: http://svn.apache.org/r1638442
Log:
LUCENE-6050: allow for MUST and MUST_NOT (in addition to SHOULD) context tags in Analyzing/BlendedInfixSuggester

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1638442&r1=1638441&r2=1638442&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Tue Nov 11 21:43:12 2014
@@ -77,6 +77,10 @@ New Features
   index using it will need to be rebuilt.  (Thomas
   Neidhart via Mike McCandless)
 
+* LUCENE-6050: Accept MUST and MUST_NOT (in addition to SHOULD) for
+  each context passed to Analyzing/BlendedInfixSuggester (Arcadius
+  Ahouansou, jane chang via Mike McCandless)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1638442&r1=1638441&r2=1638442&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Tue Nov 11 21:43:12 2014
@@ -23,6 +23,8 @@ import java.io.StringReader;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.List;
 import java.util.Set;
 
@@ -396,7 +398,22 @@ public class AnalyzingInfixSuggester ext
 
   /** Lookup, without any context. */
   public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
-    return lookup(key, null, num, allTermsRequired, doHighlight);
+    return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
+  }
+
+  /** Lookup, with context but without booleans. Context booleans default to SHOULD,
+   *  so each suggestion must have at least one of the contexts. */
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+
+    if (contexts == null) {
+      return lookup(key, num, allTermsRequired, doHighlight);
+    }
+
+    Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
+    for (BytesRef context : contexts) {
+      contextInfo.put(context, BooleanClause.Occur.SHOULD);
+    }
+    return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
   }
 
   /** This is called if the last token isn't ended
@@ -414,7 +431,7 @@ public class AnalyzingInfixSuggester ext
   /** Retrieve suggestions, specifying whether all terms
    *  must match ({@code allTermsRequired}) and whether the hits
    *  should be highlighted ({@code doHighlight}). */
-  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+  public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
 
     if (searcherMgr == null) {
       throw new IllegalStateException("suggester was not built");
@@ -475,21 +492,35 @@ public class AnalyzingInfixSuggester ext
         }
       }
 
-      if (contexts != null) {
-        BooleanQuery sub = new BooleanQuery();
-        query.add(sub, BooleanClause.Occur.MUST);
-        for(BytesRef context : contexts) {
-          // NOTE: we "should" wrap this in
-          // ConstantScoreQuery, or maybe send this as a
-          // Filter instead to search, but since all of
-          // these are MUST'd, the change to the score won't
-          // affect the overall ranking.  Since we indexed
-          // as DOCS_ONLY, the perf should be the same
-          // either way (no freq int[] blocks to decode):
-
-          // TODO: if we had a BinaryTermField we could fix
-          // this "must be valid ut8f" limitation:
-          sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.utf8ToString())), BooleanClause.Occur.SHOULD);
+      if (contextInfo != null) {
+        
+        boolean allMustNot = true;
+        for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+          if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
+            allMustNot = false;
+            break;
+          }
+        }
+
+        // do not make a subquery if all context booleans are must not
+        if (allMustNot == true) {
+          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+            query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
+          }
+
+        } else {
+          BooleanQuery sub = new BooleanQuery();
+          query.add(sub, BooleanClause.Occur.MUST);
+
+          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+            // NOTE: we "should" wrap this in
+            // ConstantScoreQuery, or maybe send this as a
+            // Filter instead to search.
+
+            // TODO: if we had a BinaryTermField we could fix
+            // this "must be valid ut8f" limitation:
+            sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
+          }
         }
       }
     }

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1638442&r1=1638441&r2=1638442&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Tue Nov 11 21:43:12 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 
@@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexOpti
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopFieldDocs;
@@ -147,6 +149,12 @@ public class BlendedInfixSuggester exten
   }
 
   @Override
+  public List<Lookup.LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+    // here we multiply the number of searched element by the defined factor
+    return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
+  }
+
+  @Override
   protected FieldType getTextFieldType() {
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java?rev=1638442&r1=1638441&r2=1638442&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java Tue Nov 11 21:43:12 2014
@@ -23,8 +23,10 @@ import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -35,6 +37,7 @@ import org.apache.lucene.analysis.core.S
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
@@ -899,7 +902,7 @@ public class AnalyzingInfixSuggesterTest
       assertTrue(result.contexts.contains(new BytesRef("foo")));
       assertTrue(result.contexts.contains(new BytesRef("bar")));
 
-      // Both suggestions have "foo" context:
+      // Both have "foo" context:
       results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, true, true);
       assertEquals(2, results.size());
 
@@ -934,8 +937,16 @@ public class AnalyzingInfixSuggesterTest
       assertTrue(result.contexts.contains(new BytesRef("foo")));
       assertTrue(result.contexts.contains(new BytesRef("bar")));
 
-      // Only one has "baz" context:
-      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("baz"), 10, true, true);
+      // None do not have "foo" context:
+      Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
+      contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
+      assertEquals(0, results.size());
+
+      // Only one does not have "bar" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
       assertEquals(1, results.size());
 
       result = results.get(0);
@@ -947,7 +958,7 @@ public class AnalyzingInfixSuggesterTest
       assertTrue(result.contexts.contains(new BytesRef("foo")));
       assertTrue(result.contexts.contains(new BytesRef("baz")));
 
-      // Both have foo or bar:
+      // Both have "foo" or "bar" context:
       results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo", "bar"), 10, true, true);
       assertEquals(2, results.size());
 
@@ -969,6 +980,99 @@ public class AnalyzingInfixSuggesterTest
       assertTrue(result.contexts.contains(new BytesRef("foo")));
       assertTrue(result.contexts.contains(new BytesRef("bar")));
 
+      // Both have "bar" or "baz" context:
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
+      assertEquals(2, results.size());
+
+      result = results.get(0);
+      assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
+      assertEquals(10, result.value);
+      assertEquals(new BytesRef("foobaz"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("baz")));
+
+      result = results.get(1);
+      assertEquals("lend me your <b>ear</b>", result.key);
+      assertEquals(8, result.value);
+      assertEquals(new BytesRef("foobar"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("bar")));
+
+      // Only one has "foo" and "bar" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
+      assertEquals(1, results.size());
+
+      result = results.get(0);
+      assertEquals("lend me your <b>ear</b>", result.key);
+      assertEquals(8, result.value);
+      assertEquals(new BytesRef("foobar"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("bar")));
+
+      // None have "bar" and "baz" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
+      contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
+      assertEquals(0, results.size());
+
+      // None do not have "foo" and do not have "bar" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
+      assertEquals(0, results.size());
+
+      // Both do not have "bar" and do not have "baz" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+      contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
+      assertEquals(2, results.size());
+
+      result = results.get(0);
+      assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
+      assertEquals(10, result.value);
+      assertEquals(new BytesRef("foobaz"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("baz")));
+
+      result = results.get(1);
+      assertEquals("lend me your <b>ear</b>", result.key);
+      assertEquals(8, result.value);
+      assertEquals(new BytesRef("foobar"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("bar")));
+
+      // Only one has "foo" and does not have "bar" context:
+      contextInfo.clear();
+      contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
+      contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+      results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
+      assertEquals(1, results.size());
+
+      result = results.get(0);
+      assertEquals("a penny saved is a penny <b>ear</b>ned", result.key);
+      assertEquals(10, result.value);
+      assertEquals(new BytesRef("foobaz"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef("foo")));
+      assertTrue(result.contexts.contains(new BytesRef("baz")));
+
       suggester.close();
     }
   }

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java?rev=1638442&r1=1638441&r2=1638442&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java Tue Nov 11 21:43:12 2014
@@ -137,12 +137,12 @@ public class BlendedInfixSuggesterTest e
 
 
     // we don't find it for in the 2 first
-    assertEquals(2, suggester.lookup("the", null, 2, true, false).size());
+    assertEquals(2, suggester.lookup("the", 2, true, false).size());
     long w0 = getInResults(suggester, "the", ret, 2);
     assertTrue(w0 < 0);
 
     // but it's there if we search for 3 elements
-    assertEquals(3, suggester.lookup("the", null, 3, true, false).size());
+    assertEquals(3, suggester.lookup("the", 3, true, false).size());
     long w1 = getInResults(suggester, "the", ret, 3);
     assertTrue(w1 > 0);
 
@@ -188,7 +188,7 @@ public class BlendedInfixSuggesterTest e
     suggester.build(new InputArrayIterator(keys));
 
 
-    List<Lookup.LookupResult> responses = suggester.lookup("the", null, 4, true, false);
+    List<Lookup.LookupResult> responses = suggester.lookup("the", 4, true, false);
 
     for (Lookup.LookupResult response : responses) {
       System.out.println(response);
@@ -199,7 +199,7 @@ public class BlendedInfixSuggesterTest e
 
   private static long getInResults(BlendedInfixSuggester suggester, String prefix, BytesRef payload, int num) throws IOException {
 
-    List<Lookup.LookupResult> responses = suggester.lookup(prefix, null, num, true, false);
+    List<Lookup.LookupResult> responses = suggester.lookup(prefix, num, true, false);
 
     for (Lookup.LookupResult response : responses) {
       if (response.payload.equals(payload)) {