You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/05/15 16:58:55 UTC
svn commit: r1679579 - in /lucene/dev/trunk/lucene: ./
suggest/src/java/org/apache/lucene/search/suggest/analyzing/
suggest/src/test/org/apache/lucene/search/suggest/analyzing/
Author: mikemccand
Date: Fri May 15 14:58:55 2015
New Revision: 1679579
URL: http://svn.apache.org/r1679579
Log:
LUCENE-6464: add expert AnalyzingInfixSuggester.lookup method that accepts arbitrary BooleanQuery to filter contexts
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1679579&r1=1679578&r2=1679579&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri May 15 14:58:55 2015
@@ -98,6 +98,11 @@ New Features
Circle, Path (buffered line string), and Polygon.
(Karl Wright via David Smiley)
+* LUCENE-6464: Add a new expert lookup method to
+ AnalyzingInfixSuggester to accept an arbitrary BooleanQuery to
+ express how contexts should be filtered. (Arcadius Ahouansou via
+ Mike McCandless)
+
Optimizations
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1679579&r1=1679578&r2=1679579&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Fri May 15 14:58:55 2015
@@ -24,7 +24,6 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -61,6 +60,7 @@ import org.apache.lucene.index.SortedSet
import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
@@ -407,22 +407,13 @@ public class AnalyzingInfixSuggester ext
/** Lookup, without any context. */
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
- return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
+ return lookup(key, (BooleanQuery)null, num, allTermsRequired, doHighlight);
}
/** Lookup, with context but without booleans. Context booleans default to SHOULD,
* so each suggestion must have at least one of the contexts. */
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
-
- if (contexts == null) {
- return lookup(key, num, allTermsRequired, doHighlight);
- }
-
- Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
- for (BytesRef context : contexts) {
- contextInfo.put(context, BooleanClause.Occur.SHOULD);
- }
- return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
+ return lookup(key, toQuery(contexts), num, allTermsRequired, doHighlight);
}
/** This is called if the last token isn't ended
@@ -441,6 +432,66 @@ public class AnalyzingInfixSuggester ext
* must match ({@code allTermsRequired}) and whether the hits
* should be highlighted ({@code doHighlight}). */
public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+ return lookup(key, toQuery(contextInfo), num, allTermsRequired, doHighlight);
+ }
+
+ private BooleanQuery toQuery(Map<BytesRef,BooleanClause.Occur> contextInfo) {
+ if (contextInfo == null || contextInfo.isEmpty()) {
+ return null;
+ }
+
+ BooleanQuery contextFilter = new BooleanQuery();
+ for (Map.Entry<BytesRef,BooleanClause.Occur> entry : contextInfo.entrySet()) {
+ addContextToQuery(contextFilter, entry.getKey(), entry.getValue());
+ }
+
+ return contextFilter;
+ }
+
+ private BooleanQuery toQuery(Set<BytesRef> contextInfo) {
+ if (contextInfo == null || contextInfo.isEmpty()) {
+ return null;
+ }
+
+ BooleanQuery contextFilter = new BooleanQuery();
+ for (BytesRef context : contextInfo) {
+ addContextToQuery(contextFilter, context, BooleanClause.Occur.SHOULD);
+ }
+ return contextFilter;
+ }
+
+
+ /**
+ * This method is handy as we do not need access to internal fields such as CONTEXTS_FIELD_NAME in order to build queries
+ * However, here may not be its best location.
+ *
+ * @param query an instance of @See {@link BooleanQuery}
+ * @param context the context
+ * @param clause one of {@link Occur}
+ */
+ public void addContextToQuery(BooleanQuery query, BytesRef context, BooleanClause.Occur clause) {
+ // NOTE: we "should" wrap this in
+ // ConstantScoreQuery, or maybe send this as a
+ // Filter instead to search.
+
+ // TODO: if we had a BinaryTermField we could fix
+ // this "must be valid ut8f" limitation:
+ query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context)), clause);
+ }
+
+ /**
+ * This is an advanced method providing the capability to send down to the suggester any
+ * arbitrary lucene query to be used to filter the result of the suggester
+ *
+ * @param key the keyword being looked for
+ * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
+ * @param num number of items to return
+ * @param allTermsRequired all searched terms must match or not
+ * @param doHighlight if true, the matching term will be highlighted in the search result
+ * @return the result of the suggester
+ * @throws IOException f the is IO exception while reading data from the index
+ */
+ public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
@@ -454,7 +505,7 @@ public class AnalyzingInfixSuggester ext
}
BooleanQuery query;
- Set<String> matchedTokens = new HashSet<>();
+ Set<String> matchedTokens;
String prefixToken = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
@@ -496,41 +547,33 @@ public class AnalyzingInfixSuggester ext
matchedTokens.add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
+
if (lastQuery != null) {
query.add(lastQuery, occur);
}
}
- if (contextInfo != null) {
-
+ if (contextQuery != null) {
boolean allMustNot = true;
- for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
- if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
+ for (BooleanClause clause : contextQuery.clauses()) {
+ if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
allMustNot = false;
break;
}
}
-
- // do not make a subquery if all context booleans are must not
- if (allMustNot == true) {
- for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
- query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
+
+ if (allMustNot) {
+ //all are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
+ for (BooleanClause clause : contextQuery.clauses()) {
+ query.add(clause);
}
-
} else {
- BooleanQuery sub = new BooleanQuery();
- query.add(sub, BooleanClause.Occur.MUST);
-
- for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
- // NOTE: we "should" wrap this in
- // ConstantScoreQuery, or maybe send this as a
- // Filter instead to search.
- sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey())), entry.getValue());
- }
+ //Add contextQuery as sub-query
+ query.add(contextQuery, BooleanClause.Occur.MUST);
}
}
}
-
+
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
@@ -566,7 +609,7 @@ public class AnalyzingInfixSuggester ext
return results;
}
-
+
/**
* Create the results based on the search hits.
* Can be overridden by subclass to add particular behavior (e.g. weight transformation).
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1679579&r1=1679578&r2=1679579&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Fri May 15 14:58:55 2015
@@ -35,6 +35,7 @@ import org.apache.lucene.index.MultiDocV
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopFieldDocs;
@@ -157,6 +158,12 @@ public class BlendedInfixSuggester exten
}
@Override
+ public List<Lookup.LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+ // here we multiply the number of searched element by the defined factor
+ return super.lookup(key, contextQuery, num * numFactor, allTermsRequired, doHighlight);
+ }
+
+ @Override
protected FieldType getTextFieldType() {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java?rev=1679579&r1=1679578&r2=1679579&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java Fri May 15 14:58:55 2015
@@ -38,6 +38,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
@@ -949,7 +950,7 @@ public class AnalyzingInfixSuggesterTest
return result;
}
- // LUCENE-5528
+ // LUCENE-5528 and LUCENE-6464
public void testBasicContext() throws Exception {
Input keys[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")),
@@ -1174,7 +1175,15 @@ public class AnalyzingInfixSuggesterTest
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
-
+
+ //LUCENE-6464 Using the advanced context filtering by query.
+ //Note that this is just a sanity test as all the above tests run through the filter by query method
+ BooleanQuery query = new BooleanQuery();
+ suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST);
+ suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
+ results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query, 10, true, true);
+ assertEquals(1, results.size());
+
suggester.close();
a.close();
}