You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ar...@apache.org on 2015/07/30 22:10:50 UTC
svn commit: r1693493 - in /lucene/dev/trunk/lucene: ./
suggest/src/java/org/apache/lucene/search/suggest/document/
suggest/src/test/org/apache/lucene/search/suggest/document/
Author: areek
Date: Thu Jul 30 20:10:49 2015
New Revision: 1693493
URL: http://svn.apache.org/r1693493
Log:
LUCENE-6702: Add a method to inject context values at index time in ContextSuggestField and simplify ContextQuery and add dedicated method to consider all contexts at query time
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jul 30 20:10:49 2015
@@ -212,6 +212,11 @@ API Changes
* LUCENE-6570: BooleanQuery is now immutable and can be built using the
BooleanQuery.Builder class. (Adrien Grand)
+* LUCENE-6702: NRTSuggester: Add a method to inject context values at index time
+ in ContextSuggestField. Simplify ContextQuery logic for extracting contexts and
+ add dedicated method to consider all context values at query time.
+ (Areek Zillur, Mike McCandless)
+
Bug fixes
* LUCENE-6500: ParallelCompositeReader did not always call
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextQuery.java Thu Jul 30 20:10:49 2015
@@ -18,7 +18,6 @@ package org.apache.lucene.search.suggest
*/
import java.io.IOException;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@@ -33,7 +32,6 @@ import org.apache.lucene.util.IntsRefBui
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.fst.Util;
/**
@@ -60,9 +58,9 @@ import org.apache.lucene.util.fst.Util;
* or {@link FuzzyCompletionQuery} query.
* </li>
* <li>
- * To suggest across all contexts with the same boost,
- * use '*' as the context in {@link #addContext(CharSequence)})}.
- * This can be combined with specific contexts with different boosts.
+ * To suggest across all contexts, use {@link #addAllContexts()}.
+ * When no context is added, the default behaviour is to suggest across
+ * all contexts.
* </li>
* <li>
* To apply the same boost to multiple contexts sharing the same prefix,
@@ -77,9 +75,11 @@ import org.apache.lucene.util.fst.Util;
* @lucene.experimental
*/
public class ContextQuery extends CompletionQuery {
- private Map<CharSequence, ContextMetaData> contexts;
+ private IntsRefBuilder scratch = new IntsRefBuilder();
+ private Map<IntsRef, ContextMetaData> contexts;
+ private boolean matchAllContexts = false;
/** Inner completion query */
- protected CompletionQuery query;
+ protected CompletionQuery innerQuery;
/**
* Constructs a context completion query that matches
@@ -94,7 +94,7 @@ public class ContextQuery extends Comple
throw new IllegalArgumentException("'query' parameter must not be of type "
+ this.getClass().getSimpleName());
}
- this.query = query;
+ this.innerQuery = query;
contexts = new HashMap<>();
}
@@ -126,20 +126,28 @@ public class ContextQuery extends Comple
+ Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
}
}
- contexts.put(context, new ContextMetaData(boost, exact));
+ contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact));
+ }
+
+ /**
+ * Add all contexts with a boost of 1f
+ */
+ public void addAllContexts() {
+ matchAllContexts = true;
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
- for (CharSequence context : contexts.keySet()) {
+ BytesRefBuilder scratch = new BytesRefBuilder();
+ for (IntsRef context : contexts.keySet()) {
if (buffer.length() != 0) {
buffer.append(",");
} else {
buffer.append("contexts");
buffer.append(":[");
}
- buffer.append(context);
+ buffer.append(Util.toBytesRef(context, scratch).utf8ToString());
ContextMetaData metaData = contexts.get(context);
if (metaData.exact == false) {
buffer.append("*");
@@ -153,52 +161,27 @@ public class ContextQuery extends Comple
buffer.append("]");
buffer.append(",");
}
- return buffer.toString() + query.toString(field);
+ return buffer.toString() + innerQuery.toString(field);
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
- IntsRefBuilder scratch = new IntsRefBuilder();
- final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
- final TreeSet<Integer> contextLengths = new TreeSet<>();
- final CompletionWeight innerWeight = ((CompletionWeight) query.createWeight(searcher, needsScores));
- Automaton contextsAutomaton = null;
- Automaton gap = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
+ final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores));
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
- gap = Operations.concatenate(gap, Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL)));
- final Automaton prefixAutomaton = Operations.concatenate(gap, innerWeight.getAutomaton());
- final Automaton matchAllAutomaton = new RegExp(".*").toAutomaton();
- for (Map.Entry<CharSequence, ContextMetaData> entry : contexts.entrySet()) {
- Automaton contextAutomaton;
- if (entry.getKey().equals("*")) {
- contextAutomaton = Operations.concatenate(matchAllAutomaton, prefixAutomaton);
- } else {
- BytesRef ref = new BytesRef(entry.getKey());
- ContextMetaData contextMetaData = entry.getValue();
- contextMap.put(IntsRef.deepCopyOf(Util.toIntsRef(ref, scratch)), contextMetaData.boost);
- contextLengths.add(scratch.length());
- contextAutomaton = Automata.makeString(entry.getKey().toString());
- if (contextMetaData.exact) {
- contextAutomaton = Operations.concatenate(contextAutomaton, prefixAutomaton);
- } else {
- contextAutomaton = Operations.concatenate(Arrays.asList(contextAutomaton,
- matchAllAutomaton,
- prefixAutomaton));
- }
- }
- if (contextsAutomaton == null) {
- contextsAutomaton = contextAutomaton;
- } else {
- contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
- }
- }
- if (contexts.size() == 0) {
- addContext("*");
- contextsAutomaton = Operations.concatenate(matchAllAutomaton, prefixAutomaton);
- }
+ Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
+ Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
+ Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+
+ final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
+ final TreeSet<Integer> contextLengths = new TreeSet<>();
+ for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
+ ContextMetaData contextMetaData = entry.getValue();
+ contextMap.put(entry.getKey(), contextMetaData.boost);
+ contextLengths.add(entry.getKey().length);
+ }
int[] contextLengthArray = new int[contextLengths.size()];
final Iterator<Integer> iterator = contextLengths.descendingIterator();
for (int i = 0; iterator.hasNext(); i++) {
@@ -207,8 +190,47 @@ public class ContextQuery extends Comple
return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
+ private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
+ final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
+ final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
+ if (matchAllContexts || contexts.size() == 0) {
+ return Operations.concatenate(matchAllAutomaton, sep);
+ } else {
+ Automaton contextsAutomaton = null;
+ for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
+ final ContextMetaData contextMetaData = entry.getValue();
+ final IntsRef ref = entry.getKey();
+ Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
+ if (contextMetaData.exact == false) {
+ contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
+ }
+ contextAutomaton = Operations.concatenate(contextAutomaton, sep);
+ if (contextsAutomaton == null) {
+ contextsAutomaton = contextAutomaton;
+ } else {
+ contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
+ }
+ }
+ return contextsAutomaton;
+ }
+ }
+
+ /**
+ * Holder for context value meta data
+ */
private static class ContextMetaData {
+
+ /**
+ * Boost associated with a
+ * context value
+ */
private final float boost;
+
+ /**
+ * flag to indicate whether the context
+ * value should be treated as an exact
+ * value or a context prefix
+ */
private final boolean exact;
private ContextMetaData(float boost, boolean exact) {
@@ -237,7 +259,7 @@ public class ContextQuery extends Comple
}
@Override
- protected void setNextMatch(IntsRef pathPrefix) {
+ protected void setNextMatch(final IntsRef pathPrefix) {
IntsRef ref = pathPrefix.clone();
// check if the pathPrefix matches any
@@ -250,44 +272,36 @@ public class ContextQuery extends Comple
if (contextMap.containsKey(ref)) {
currentBoost = contextMap.get(ref);
ref.length = pathPrefix.length;
- ref.offset = contextLength;
- while (ref.ints[ref.offset] != ContextSuggestField.CONTEXT_SEPARATOR) {
- ref.offset++;
- assert ref.offset < ref.length;
- }
- assert ref.ints[ref.offset] == ContextSuggestField.CONTEXT_SEPARATOR :
- "expected CONTEXT_SEPARATOR at offset=" + ref.offset;
- if (ref.offset > pathPrefix.offset) {
- currentContext = Util.toBytesRef(new IntsRef(pathPrefix.ints, pathPrefix.offset, ref.offset), scratch).utf8ToString();
- } else {
- currentContext = null;
- }
- ref.offset++;
- if (ref.ints[ref.offset] == CompletionAnalyzer.SEP_LABEL) {
- ref.offset++;
- }
- innerWeight.setNextMatch(ref);
+ setInnerWeight(ref, contextLength);
return;
}
}
// unknown context
ref.length = pathPrefix.length;
- currentBoost = contexts.get("*").boost;
- for (int i = pathPrefix.offset; i < pathPrefix.length; i++) {
- if (pathPrefix.ints[i] == ContextSuggestField.CONTEXT_SEPARATOR) {
- if (i > pathPrefix.offset) {
- currentContext = Util.toBytesRef(new IntsRef(pathPrefix.ints, pathPrefix.offset, i), scratch).utf8ToString();
+ currentBoost = 0f;
+ setInnerWeight(ref, 0);
+ }
+
+ private void setInnerWeight(IntsRef ref, int offset) {
+ IntsRefBuilder refBuilder = new IntsRefBuilder();
+ for (int i = offset; i < ref.length; i++) {
+ if (ref.ints[ref.offset + i] == ContextSuggestField.CONTEXT_SEPARATOR) {
+ if (i > 0) {
+ refBuilder.copyInts(ref.ints, ref.offset, i);
+ currentContext = Util.toBytesRef(refBuilder.get(), scratch).utf8ToString();
} else {
currentContext = null;
}
ref.offset = ++i;
assert ref.offset < ref.length : "input should not end with the context separator";
- if (pathPrefix.ints[i] == CompletionAnalyzer.SEP_LABEL) {
+ if (ref.ints[i] == CompletionAnalyzer.SEP_LABEL) {
ref.offset++;
assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_LABEL";
}
- ref.length -= ref.offset;
- innerWeight.setNextMatch(ref);
+ ref.length = ref.length - ref.offset;
+ refBuilder.copyInts(ref.ints, ref.offset, ref.length);
+ innerWeight.setNextMatch(refBuilder.get());
+ return;
}
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/ContextSuggestField.java Thu Jul 30 20:10:49 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.search.suggest
*/
import java.io.IOException;
+import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
@@ -70,26 +71,33 @@ public class ContextSuggestField extends
validate(value);
this.contexts = new HashSet<>((contexts != null) ? contexts.length : 0);
if (contexts != null) {
- for (CharSequence context : contexts) {
- validate(context);
- this.contexts.add(context);
- }
+ Collections.addAll(this.contexts, contexts);
}
}
+ /**
+ * Expert: Sub-classes can inject contexts at
+ * index-time
+ */
+ protected Iterable<CharSequence> contexts() {
+ return contexts;
+ }
+
@Override
protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
+ for (CharSequence context : contexts()) {
+ validate(context);
+ }
+ PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts());
CompletionTokenStream completionTokenStream;
if (stream instanceof CompletionTokenStream) {
completionTokenStream = (CompletionTokenStream) stream;
- completionTokenStream = new CompletionTokenStream(
- new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts),
+ completionTokenStream = new CompletionTokenStream(prefixTokenFilter,
completionTokenStream.preserveSep,
completionTokenStream.preservePositionIncrements,
completionTokenStream.maxGraphExpansions);
} else {
- completionTokenStream = new CompletionTokenStream(
- new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
+ completionTokenStream = new CompletionTokenStream(prefixTokenFilter);
}
return completionTokenStream;
}
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java Thu Jul 30 20:10:49 2015
@@ -251,7 +251,7 @@ public class TestContextQuery extends Lu
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type4", 10);
- query.addContext("*");
+ query.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
assertSuggestions(suggest,
new Entry("suggestion4", "type4", 1 * 10),
@@ -323,13 +323,13 @@ public class TestContextQuery extends Lu
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type1", 7);
query.addContext("type2", 6);
- query.addContext("*", 5);
+ query.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4 * 7),
new Entry("suggestion2", "type2", 3 * 6),
- new Entry("suggestion3", "type3", 2 * 5),
- new Entry("suggestion4", "type4", 1 * 5)
+ new Entry("suggestion3", "type3", 2),
+ new Entry("suggestion4", "type4", 1)
);
reader.close();
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java Thu Jul 30 20:10:49 2015
@@ -63,15 +63,23 @@ public class TestContextSuggestField ext
CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
charsRefBuilder.append("sugg");
charsRefBuilder.setCharAt(2, (char) ContextSuggestField.CONTEXT_SEPARATOR);
- try {
- new ContextSuggestField("name", "sugg", 1, charsRefBuilder.toString());
+
+ Analyzer analyzer = new MockAnalyzer(random());
+ Document document = new Document();
+ try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "name"))) {
+ document.add(new ContextSuggestField("name", "sugg", 1, charsRefBuilder.toString()));
+ iw.addDocument(document);
+ iw.commit();
fail("no exception thrown for context value containing CONTEXT_SEPARATOR:" + ContextSuggestField.CONTEXT_SEPARATOR);
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains("[0x1d]"));
}
+ document.clear();
- try {
- new ContextSuggestField("name", charsRefBuilder.toString(), 1, "sugg");
+ try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "name"))) {
+ document.add(new ContextSuggestField("name", charsRefBuilder.toString(), 1, "sugg"));
+ iw.addDocument(document);
+ iw.commit();
fail("no exception thrown for value containing CONTEXT_SEPARATOR:" + ContextSuggestField.CONTEXT_SEPARATOR);
} catch (IllegalArgumentException e) {
assertTrue(e.getMessage().contains("[0x1d]"));
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java Thu Jul 30 20:10:49 2015
@@ -104,11 +104,11 @@ public class TestFuzzyCompletionQuery ex
CompletionQuery query = new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge")));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
assertSuggestions(suggest,
- new Entry("suggestion", "type4", 1 + 4),
- new Entry("suggdestion", "type4", 1 + 4),
- new Entry("sugdgestion", "type3", 1 + 3),
- new Entry("sudggestion", "type2", 1 + 2),
- new Entry("sduggestion", "type1", 1 + 1)
+ new Entry("suggestion", "type4", 4),
+ new Entry("suggdestion", "type4", 4),
+ new Entry("sugdgestion", "type3", 3),
+ new Entry("sudggestion", "type2", 2),
+ new Entry("sduggestion", "type1", 1)
);
reader.close();
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java?rev=1693493&r1=1693492&r2=1693493&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java Thu Jul 30 20:10:49 2015
@@ -137,7 +137,7 @@ public class TestRegexCompletionQuery ex
ContextQuery contextQuery = new ContextQuery(query);
contextQuery.addContext("type1", 6);
contextQuery.addContext("type3", 7);
- contextQuery.addContext("*");
+ contextQuery.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
assertSuggestions(suggest,
new Entry("sduggestion", "type1", 5 * 6),