You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/05/14 17:29:41 UTC
svn commit: r1679392 - in /lucene/dev/trunk/lucene/suggest/src:
java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
Author: mikemccand
Date: Thu May 14 15:29:41 2015
New Revision: 1679392
URL: http://svn.apache.org/r1679392
Log:
allow full binary terms for suggest contexts
Modified:
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1679392&r1=1679391&r2=1679392&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Thu May 14 15:29:41 2015
@@ -84,7 +84,6 @@ import org.apache.lucene.util.Accountabl
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
-// javadocs
// TODO:
// - a PostingsFormat that stores super-high-freq terms as
@@ -104,9 +103,8 @@ import org.apache.lucene.util.RamUsageEs
* this suggester best applies when there is a strong
* a-priori ranking of all the suggestions.
*
- * <p>This suggester supports contexts, however the
- * contexts must be valid utf8 (arbitrary binary terms will
- * not work).
+ * <p>This suggester supports contexts, including arbitrary binary
+ * terms.
*
* @lucene.experimental */
@@ -373,9 +371,7 @@ public class AnalyzingInfixSuggester ext
}
if (contexts != null) {
for(BytesRef context : contexts) {
- // TODO: if we had a BinaryTermField we could fix
- // this "must be valid ut8f" limitation:
- doc.add(new StringField(CONTEXTS_FIELD_NAME, context.utf8ToString(), Field.Store.NO));
+ doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
}
}
@@ -529,10 +525,7 @@ public class AnalyzingInfixSuggester ext
// NOTE: we "should" wrap this in
// ConstantScoreQuery, or maybe send this as a
// Filter instead to search.
-
- // TODO: if we had a BinaryTermField we could fix
- // this "must be valid ut8f" limitation:
- sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
+ sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
}
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java?rev=1679392&r1=1679391&r2=1679392&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java Thu May 14 15:29:41 2015
@@ -940,6 +940,15 @@ public class AnalyzingInfixSuggesterTest
return result;
}
+ private Set<BytesRef> asSet(byte[]... values) {
+ HashSet<BytesRef> result = new HashSet<>();
+ for(byte[] value : values) {
+ result.add(new BytesRef(value));
+ }
+
+ return result;
+ }
+
// LUCENE-5528
public void testBasicContext() throws Exception {
Input keys[] = new Input[] {
@@ -1194,5 +1203,56 @@ public class AnalyzingInfixSuggesterTest
return sb.toString();
}
+ public void testBinaryContext() throws Exception {
+ byte[] context1 = new byte[4];
+ byte[] context2 = new byte[5];
+ byte[] context3 = new byte[1];
+ context3[0] = (byte) 0xff;
+
+ Input keys[] = new Input[] {
+ new Input("lend me your ear", 8, new BytesRef("foobar"), asSet(context1, context2)),
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet(context1, context3))
+ };
+
+ Path tempDir = createTempDir("analyzingInfixContext");
+
+ for(int iter=0;iter<2;iter++) {
+ AnalyzingInfixSuggester suggester;
+ Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ if (iter == 0) {
+ suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
+ suggester.build(new InputArrayIterator(keys));
+ } else {
+ // Test again, after close/reopen:
+ suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
+ }
+ // Both have context1:
+ List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet(context1), 10, true, true);
+ assertEquals(2, results.size());
+
+ LookupResult result = results.get(0);
+ assertEquals("a penny saved is a penny earned", result.key);
+ assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
+ assertEquals(10, result.value);
+ assertEquals(new BytesRef("foobaz"), result.payload);
+ assertNotNull(result.contexts);
+ assertEquals(2, result.contexts.size());
+ assertTrue(result.contexts.contains(new BytesRef(context1)));
+ assertTrue(result.contexts.contains(new BytesRef(context3)));
+
+ result = results.get(1);
+ assertEquals("lend me your ear", result.key);
+ assertEquals("lend me your <b>ear</b>", result.highlightKey);
+ assertEquals(8, result.value);
+ assertEquals(new BytesRef("foobar"), result.payload);
+ assertNotNull(result.contexts);
+ assertEquals(2, result.contexts.size());
+ assertTrue(result.contexts.contains(new BytesRef(context1)));
+ assertTrue(result.contexts.contains(new BytesRef(context2)));
+
+ suggester.close();
+ a.close();
+ }
+ }
}