You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/05/14 17:29:41 UTC

svn commit: r1679392 - in /lucene/dev/trunk/lucene/suggest/src: java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java

Author: mikemccand
Date: Thu May 14 15:29:41 2015
New Revision: 1679392

URL: http://svn.apache.org/r1679392
Log:
allow full binary terms for suggest contexts

Modified:
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
    lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java

Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1679392&r1=1679391&r2=1679392&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Thu May 14 15:29:41 2015
@@ -84,7 +84,6 @@ import org.apache.lucene.util.Accountabl
 import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
-// javadocs
 
 // TODO:
 //   - a PostingsFormat that stores super-high-freq terms as
@@ -104,9 +103,8 @@ import org.apache.lucene.util.RamUsageEs
  *  this suggester best applies when there is a strong
  *  a-priori ranking of all the suggestions.
  *
- *  <p>This suggester supports contexts, however the
- *  contexts must be valid utf8 (arbitrary binary terms will
- *  not work).
+ *  <p>This suggester supports contexts, including arbitrary binary
+ *  terms.
  *
  * @lucene.experimental */    
 
@@ -373,9 +371,7 @@ public class AnalyzingInfixSuggester ext
     }
     if (contexts != null) {
       for(BytesRef context : contexts) {
-        // TODO: if we had a BinaryTermField we could fix
-        // this "must be valid ut8f" limitation:
-        doc.add(new StringField(CONTEXTS_FIELD_NAME, context.utf8ToString(), Field.Store.NO));
+        doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
         doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
       }
     }
@@ -529,10 +525,7 @@ public class AnalyzingInfixSuggester ext
             // NOTE: we "should" wrap this in
             // ConstantScoreQuery, or maybe send this as a
             // Filter instead to search.
-
-            // TODO: if we had a BinaryTermField we could fix
-            // this "must be valid ut8f" limitation:
-            sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
+            sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
           }
         }
       }

Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java?rev=1679392&r1=1679391&r2=1679392&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java Thu May 14 15:29:41 2015
@@ -940,6 +940,15 @@ public class AnalyzingInfixSuggesterTest
     return result;
   }
 
+  private Set<BytesRef> asSet(byte[]... values) {
+    HashSet<BytesRef> result = new HashSet<>();
+    for(byte[] value : values) {
+      result.add(new BytesRef(value));
+    }
+
+    return result;
+  }
+
   // LUCENE-5528
   public void testBasicContext() throws Exception {
     Input keys[] = new Input[] {
@@ -1194,5 +1203,56 @@ public class AnalyzingInfixSuggesterTest
     return sb.toString();
   }
 
+  public void testBinaryContext() throws Exception {
+    byte[] context1 = new byte[4];
+    byte[] context2 = new byte[5];
+    byte[] context3 = new byte[1];
+    context3[0] = (byte) 0xff;
+
+    Input keys[] = new Input[] {
+      new Input("lend me your ear", 8, new BytesRef("foobar"), asSet(context1, context2)),
+      new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet(context1, context3))
+    };
+
+    Path tempDir = createTempDir("analyzingInfixContext");
+
+    for(int iter=0;iter<2;iter++) {
+      AnalyzingInfixSuggester suggester;
+      Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+      if (iter == 0) {
+        suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
+        suggester.build(new InputArrayIterator(keys));
+      } else {
+        // Test again, after close/reopen:
+        suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
+      }
 
+      // Both have context1:
+      List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet(context1), 10, true, true);
+      assertEquals(2, results.size());
+
+      LookupResult result = results.get(0);
+      assertEquals("a penny saved is a penny earned", result.key);
+      assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
+      assertEquals(10, result.value);
+      assertEquals(new BytesRef("foobaz"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef(context1)));
+      assertTrue(result.contexts.contains(new BytesRef(context3)));
+
+      result = results.get(1);
+      assertEquals("lend me your ear", result.key);
+      assertEquals("lend me your <b>ear</b>", result.highlightKey);
+      assertEquals(8, result.value);
+      assertEquals(new BytesRef("foobar"), result.payload);
+      assertNotNull(result.contexts);
+      assertEquals(2, result.contexts.size());
+      assertTrue(result.contexts.contains(new BytesRef(context1)));
+      assertTrue(result.contexts.contains(new BytesRef(context2)));
+
+      suggester.close();
+      a.close();
+    }
+  }
 }