You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/02/22 21:04:35 UTC
lucene-solr:master: LUCENE-7686: add efficient de-duping to the NRT
document suggester
Repository: lucene-solr
Updated Branches:
refs/heads/master 29a5ea44a -> 4e2cf61ac
LUCENE-7686: add efficient de-duping to the NRT document suggester
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4e2cf61a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4e2cf61a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4e2cf61a
Branch: refs/heads/master
Commit: 4e2cf61ac76db33f35d3aceacaf1563a9bd5edb2
Parents: 29a5ea4
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Feb 22 16:04:26 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Feb 22 16:04:26 2017 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +
.../java/org/apache/lucene/util/fst/Util.java | 80 +++---
.../suggest/document/CompletionAnalyzer.java | 2 +-
.../suggest/document/CompletionQuery.java | 2 +-
.../search/suggest/document/NRTSuggester.java | 89 ++++--
.../search/suggest/document/SuggestField.java | 2 +-
.../suggest/document/SuggestIndexSearcher.java | 7 +-
.../search/suggest/document/TopSuggestDocs.java | 19 ++
.../document/TopSuggestDocsCollector.java | 83 +++++-
.../suggest/document/TestContextQuery.java | 26 +-
.../document/TestContextSuggestField.java | 8 +-
.../document/TestFuzzyCompletionQuery.java | 6 +-
.../document/TestPrefixCompletionQuery.java | 28 +-
.../document/TestRegexCompletionQuery.java | 6 +-
.../suggest/document/TestSuggestField.java | 278 +++++++++++++++++--
15 files changed, 517 insertions(+), 123 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c6c97fb..e71149b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -126,6 +126,10 @@ New Features
* LUCENE-7688: Add OneMergeWrappingMergePolicy class.
(Keith Laban, Christine Poerschke)
+* LUCENE-7686: The near-real-time document suggester can now
+ efficiently filter out duplicate suggestions (Uwe Schindler, Mike
+ McCandless)
+
Bug Fixes
* LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
index 341b8d0..2f83dd1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@@ -248,32 +248,38 @@ public final class Util {
* @lucene.experimental
*/
public static class FSTPath<T> {
+ /** Holds the last arc appended to this path */
public FST.Arc<T> arc;
- public T cost;
+ /** Holds cost plus any usage-specific output: */
+ public T output;
public final IntsRefBuilder input;
public final float boost;
public final CharSequence context;
+ // Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form
+ public int payload;
+
/** Sole constructor */
- public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input) {
- this(cost, arc, input, 0, null);
+ public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input) {
+ this(output, arc, input, 0, null, -1);
}
- public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context) {
+ public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
this.arc = new FST.Arc<T>().copyFrom(arc);
- this.cost = cost;
+ this.output = output;
this.input = input;
this.boost = boost;
this.context = context;
+ this.payload = payload;
}
- public FSTPath<T> newPath(T cost, IntsRefBuilder input) {
- return new FSTPath<>(cost, this.arc, input, this.boost, this.context);
+ public FSTPath<T> newPath(T output, IntsRefBuilder input) {
+ return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
}
@Override
public String toString() {
- return "input=" + input.get() + " cost=" + cost + "context=" + context + "boost=" + boost;
+ return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
}
}
@@ -287,7 +293,7 @@ public final class Util {
@Override
public int compare(FSTPath<T> a, FSTPath<T> b) {
- int cmp = comparator.compare(a.cost, b.cost);
+ int cmp = comparator.compare(a.output, b.output);
if (cmp == 0) {
return a.input.get().compareTo(b.input.get());
} else {
@@ -339,8 +345,7 @@ public final class Util {
assert queue != null;
- T cost = fst.outputs.add(path.cost, path.arc.output);
- //System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
+ T output = fst.outputs.add(path.output, path.arc.output);
if (queue.size() == maxQueueDepth) {
FSTPath<T> bottom = queue.last();
@@ -373,32 +378,32 @@ public final class Util {
newInput.copyInts(path.input.get());
newInput.append(path.arc.label);
- queue.add(path.newPath(cost, newInput));
-
- if (queue.size() == maxQueueDepth+1) {
- queue.pollLast();
+ FSTPath<T> newPath = path.newPath(output, newInput);
+ if (acceptPartialPath(newPath)) {
+ queue.add(newPath);
+ if (queue.size() == maxQueueDepth+1) {
+ queue.pollLast();
+ }
}
}
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
- addStartPaths(node, startOutput, allowEmptyString, input, 0, null);
+ addStartPaths(node, startOutput, allowEmptyString, input, 0, null, -1);
}
/** Adds all leaving arcs, including 'finished' arc, if
* the node is final, from this node into the queue. */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input,
- float boost, CharSequence context) throws IOException {
+ float boost, CharSequence context, int payload) throws IOException {
// De-dup NO_OUTPUT since it must be a singleton:
if (startOutput.equals(fst.outputs.getNoOutput())) {
startOutput = fst.outputs.getNoOutput();
}
- FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context);
+ FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
fst.readFirstTargetArc(node, path.arc, bytesReader);
- //System.out.println("add start paths");
-
// Bootstrap: find the min starting arc
while (true) {
if (allowEmptyString || path.arc.label != FST.END_LABEL) {
@@ -415,8 +420,6 @@ public final class Util {
final List<Result<T>> results = new ArrayList<>();
- //System.out.println("search topN=" + topN);
-
final BytesReader fstReader = fst.getBytesReader();
final T NO_OUTPUT = fst.outputs.getNoOutput();
@@ -430,13 +433,11 @@ public final class Util {
// For each top N path:
while (results.size() < topN) {
- //System.out.println("\nfind next path: queue.size=" + queue.size());
FSTPath<T> path;
if (queue == null) {
// Ran out of paths
- //System.out.println(" break queue=null");
break;
}
@@ -446,15 +447,18 @@ public final class Util {
if (path == null) {
// There were less than topN paths available:
- //System.out.println(" break no more paths");
break;
}
+ //System.out.println("pop path=" + path + " arc=" + path.arc.output);
+
+ if (acceptPartialPath(path) == false) {
+ continue;
+ }
if (path.arc.label == FST.END_LABEL) {
- //System.out.println(" empty string! cost=" + path.cost);
// Empty string!
path.input.setLength(path.input.length() - 1);
- results.add(new Result<>(path.input.get(), path.cost));
+ results.add(new Result<>(path.input.get(), path.output));
continue;
}
@@ -463,8 +467,6 @@ public final class Util {
queue = null;
}
- //System.out.println(" path: " + path);
-
// We take path and find its "0 output completion",
// ie, just keep traversing the first arc with
// NO_OUTPUT that we can find, since this must lead
@@ -474,13 +476,11 @@ public final class Util {
// For each input letter:
while (true) {
- //System.out.println("\n cycle path: " + path);
fst.readFirstTargetArc(path.arc, path.arc, fstReader);
// For each arc leaving this node:
boolean foundZero = false;
while(true) {
- //System.out.println(" arc=" + (char) path.arc.label + " cost=" + path.arc.output);
// tricky: instead of comparing output == 0, we must
// express it via the comparator compare(output, 0) == 0
if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
@@ -514,18 +514,19 @@ public final class Util {
if (path.arc.label == FST.END_LABEL) {
// Add final output:
- //System.out.println(" done!: " + path);
- path.cost = fst.outputs.add(path.cost, path.arc.output);
+ path.output = fst.outputs.add(path.output, path.arc.output);
if (acceptResult(path)) {
- //System.out.println(" add result: " + path);
- results.add(new Result<>(path.input.get(), path.cost));
+ results.add(new Result<>(path.input.get(), path.output));
} else {
rejectCount++;
}
break;
} else {
path.input.append(path.arc.label);
- path.cost = fst.outputs.add(path.cost, path.arc.output);
+ path.output = fst.outputs.add(path.output, path.arc.output);
+ if (acceptPartialPath(path) == false) {
+ break;
+ }
}
}
}
@@ -533,7 +534,12 @@ public final class Util {
}
protected boolean acceptResult(FSTPath<T> path) {
- return acceptResult(path.input.get(), path.cost);
+ return acceptResult(path.input.get(), path.output);
+ }
+
+ /** Override this to prevent considering a path before it's complete */
+ protected boolean acceptPartialPath(FSTPath<T> path) {
+ return true;
}
protected boolean acceptResult(IntsRef input, T output) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
index 6366b6c..13bd392 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
@@ -81,7 +81,7 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
private final int maxGraphExpansions;
/**
- * Wraps an analyzer to convert it's output token stream to an automaton
+ * Wraps an analyzer to convert its output token stream to an automaton
*
* @param analyzer token stream to be converted to an automaton
* @param preserveSep Preserve separation between tokens when converting to an automaton
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
index 71ba15a..49fe7d0 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
@@ -34,7 +34,7 @@ import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_L
* filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s
* or {@link ContextSuggestField}s of documents.
* <p>
- * Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} to execute any query
+ * Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} to execute any query
* that provides a concrete implementation of this query. Example below shows using this query
* to retrieve the top 5 documents.
*
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
index 52e4ea0..7b8981a 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
@@ -32,12 +32,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
-import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseDocID;
import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseSurfaceForm;
/**
@@ -142,21 +141,74 @@ public final class NRTSuggester implements Accountable {
// maximum number of suggestions that can be collected.
final int topN = collector.getCountToCollect() * prefixPaths.size();
final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered);
+
+ final CharsRefBuilder spare = new CharsRefBuilder();
+
Comparator<Pair<Long, BytesRef>> comparator = getComparator();
Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator,
new ScoringPathComparator(scorer)) {
- private final CharsRefBuilder spare = new CharsRefBuilder();
+ private final ByteArrayDataInput scratchInput = new ByteArrayDataInput();
+
+ @Override
+ protected boolean acceptPartialPath(Util.FSTPath<Pair<Long,BytesRef>> path) {
+ if (collector.doSkipDuplicates()) {
+ // We are removing dups
+ if (path.payload == -1) {
+ // This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added:
+ BytesRef arcOutput = path.arc.output.output2;
+ BytesRef output = path.output.output2;
+ for(int i=0;i<arcOutput.length;i++) {
+ if (arcOutput.bytes[arcOutput.offset + i] == payloadSep) {
+ // OK this arc that the path was just extended by contains the payloadSep, so we now have a full surface form in this path
+ path.payload = output.length - arcOutput.length + i;
+ assert output.bytes[output.offset + path.payload] == payloadSep;
+ break;
+ }
+ }
+ }
+
+ if (path.payload != -1) {
+ BytesRef output = path.output.output2;
+ spare.copyUTF8Bytes(output.bytes, output.offset, path.payload);
+ if (collector.seenSurfaceForms.contains(spare.chars(), 0, spare.length())) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
@Override
protected boolean acceptResult(Util.FSTPath<Pair<Long, BytesRef>> path) {
- int payloadSepIndex = parseSurfaceForm(path.cost.output2, payloadSep, spare);
- int docID = parseDocID(path.cost.output2, payloadSepIndex);
+ BytesRef output = path.output.output2;
+ int payloadSepIndex;
+ if (path.payload != -1) {
+ payloadSepIndex = path.payload;
+ spare.copyUTF8Bytes(output.bytes, output.offset, payloadSepIndex);
+ } else {
+ assert collector.doSkipDuplicates() == false;
+ payloadSepIndex = parseSurfaceForm(output, payloadSep, spare);
+ }
+
+ scratchInput.reset(output.bytes, output.offset + payloadSepIndex + 1, output.length - payloadSepIndex - 1);
+ int docID = scratchInput.readVInt();
+
if (!scorer.accept(docID, acceptDocs)) {
return false;
}
+ if (collector.doSkipDuplicates()) {
+ // now record that we've seen this surface form:
+ char[] key = new char[spare.length()];
+ System.arraycopy(spare.chars(), 0, key, 0, spare.length());
+ if (collector.seenSurfaceForms.contains(key)) {
+ // we already collected a higher scoring document with this key, in this segment:
+ return false;
+ }
+ collector.seenSurfaceForms.add(key);
+ }
try {
- float score = scorer.score(decode(path.cost.output1), path.boost);
+ float score = scorer.score(decode(path.output.output1), path.boost);
collector.collect(docID, spare.toCharsRef(), path.context, score);
return true;
} catch (IOException e) {
@@ -167,8 +219,20 @@ public final class NRTSuggester implements Accountable {
for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
scorer.weight.setNextMatch(path.input.get());
+ BytesRef output = path.output.output2;
+ int payload = -1;
+ if (collector.doSkipDuplicates()) {
+ for(int j=0;j<output.length;j++) {
+ if (output.bytes[output.offset+j] == payloadSep) {
+ // Important to cache this, else we have a possibly O(N^2) cost where N is the length of suggestions
+ payload = j;
+ break;
+ }
+ }
+ }
+
searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(),
- scorer.weight.context());
+ scorer.weight.context(), payload);
}
// hits are also returned by search()
// we do not use it, instead collect at acceptResult
@@ -191,8 +255,8 @@ public final class NRTSuggester implements Accountable {
@Override
public int compare(Util.FSTPath<Pair<Long, BytesRef>> first, Util.FSTPath<Pair<Long, BytesRef>> second) {
- int cmp = Float.compare(scorer.score(decode(second.cost.output1), second.boost),
- scorer.score(decode(first.cost.output1), first.boost));
+ int cmp = Float.compare(scorer.score(decode(second.output.output1), second.boost),
+ scorer.score(decode(first.output.output1), first.boost));
return (cmp != 0) ? cmp : first.input.get().compareTo(second.input.get());
}
}
@@ -285,13 +349,6 @@ public final class NRTSuggester implements Accountable {
return surfaceFormLen;
}
- static int parseDocID(final BytesRef output, int payloadSepIndex) {
- assert payloadSepIndex != -1 : "payload sep index can not be -1";
- ByteArrayDataInput input = new ByteArrayDataInput(output.bytes, payloadSepIndex + output.offset + 1,
- output.length - (payloadSepIndex + output.offset));
- return input.readVInt();
- }
-
static BytesRef make(final BytesRef surface, int docID, int payloadSep) throws IOException {
int len = surface.length + MAX_DOC_ID_LEN_WITH_SEP;
byte[] buffer = new byte[len];
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
index 798a0b8..e5bdda9 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
@@ -47,7 +47,7 @@ import org.apache.lucene.util.BytesRef;
* document.add(new SuggestField(name, "suggestion", 4));
* </pre>
* To perform document suggestions based on the this field, use
- * {@link SuggestIndexSearcher#suggest(CompletionQuery, int)}
+ * {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)}
*
* @lucene.experimental
*/
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
index a64afed..5f65906 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
@@ -38,6 +38,9 @@ import org.apache.lucene.search.Weight;
*/
public class SuggestIndexSearcher extends IndexSearcher {
+ // NOTE: we do not accept an ExecutorService here, because at least the dedup
+ // logic in TopSuggestDocsCollector/NRTSuggester would not be thread safe (and maybe other things)
+
/**
* Creates a searcher with document suggest capabilities
* for <code>reader</code>.
@@ -50,8 +53,8 @@ public class SuggestIndexSearcher extends IndexSearcher {
* Returns top <code>n</code> completion hits for
* <code>query</code>
*/
- public TopSuggestDocs suggest(CompletionQuery query, int n) throws IOException {
- TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n);
+ public TopSuggestDocs suggest(CompletionQuery query, int n, boolean skipDuplicates) throws IOException {
+ TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n, skipDuplicates);
suggest(query, collector);
return collector.get();
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
index 6154d29..1ffcbdc 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
@@ -66,6 +66,25 @@ public class TopSuggestDocs extends TopDocs {
public int compareTo(SuggestScoreDoc o) {
return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
}
+
+ @Override
+ public boolean equals(Object other) {
+ if (other instanceof SuggestScoreDoc == false) {
+ return false;
+ } else {
+ return key.equals(((SuggestScoreDoc) other).key);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return key.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return "key=" + key + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
index d50e93b..3336896 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
@@ -17,7 +17,12 @@
package org.apache.lucene.search.suggest.document;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.SimpleCollector;
@@ -47,9 +52,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
private final SuggestScoreDocPriorityQueue priorityQueue;
private final int num;
- /**
- * Document base offset for the current Leaf
- */
+ /** Only set if we are deduplicating hits: holds all per-segment hits until the end, when we dedup them */
+ private final List<SuggestScoreDoc> pendingResults;
+
+ /** Only set if we are deduplicating hits: holds all surface forms seen so far in the current segment */
+ final CharArraySet seenSurfaceForms;
+
+ /** Document base offset for the current Leaf */
protected int docBase;
/**
@@ -58,12 +67,24 @@ public class TopSuggestDocsCollector extends SimpleCollector {
* Collects at most <code>num</code> completions
* with corresponding document and weight
*/
- public TopSuggestDocsCollector(int num) {
+ public TopSuggestDocsCollector(int num, boolean skipDuplicates) {
if (num <= 0) {
throw new IllegalArgumentException("'num' must be > 0");
}
this.num = num;
this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
+ if (skipDuplicates) {
+ seenSurfaceForms = new CharArraySet(num, false);
+ pendingResults = new ArrayList<>();
+ } else {
+ seenSurfaceForms = null;
+ pendingResults = null;
+ }
+ }
+
+ /** Returns true if duplicates are filtered out */
+ protected boolean doSkipDuplicates() {
+ return seenSurfaceForms != null;
}
/**
@@ -76,6 +97,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
+ if (seenSurfaceForms != null) {
+ seenSurfaceForms.clear();
+ // NOTE: this also clears the priorityQueue:
+ for (SuggestScoreDoc hit : priorityQueue.getResults()) {
+ pendingResults.add(hit);
+ }
+ }
}
/**
@@ -101,7 +129,52 @@ public class TopSuggestDocsCollector extends SimpleCollector {
* Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
*/
public TopSuggestDocs get() throws IOException {
- SuggestScoreDoc[] suggestScoreDocs = priorityQueue.getResults();
+
+ SuggestScoreDoc[] suggestScoreDocs;
+
+ if (seenSurfaceForms != null) {
+ // NOTE: this also clears the priorityQueue:
+ for (SuggestScoreDoc hit : priorityQueue.getResults()) {
+ pendingResults.add(hit);
+ }
+
+ // Deduplicate all hits: we already dedup'd efficiently within each segment by
+ // truncating the FST top paths search, but across segments there may still be dups:
+ seenSurfaceForms.clear();
+
+ // TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * log(N)), where N = O(num *
+ // numSegments), but typically numSegments is smallish and num is smallish so this won't matter much in practice:
+
+ Collections.sort(pendingResults,
+ new Comparator<SuggestScoreDoc>() {
+ @Override
+ public int compare(SuggestScoreDoc a, SuggestScoreDoc b) {
+ // sort by higher score
+ int cmp = Float.compare(b.score, a.score);
+ if (cmp == 0) {
+ // tie break by lower docID:
+ cmp = Integer.compare(a.doc, b.doc);
+ }
+ return cmp;
+ }
+ });
+
+ List<SuggestScoreDoc> hits = new ArrayList<>();
+
+ for (SuggestScoreDoc hit : pendingResults) {
+ if (seenSurfaceForms.contains(hit.key) == false) {
+ seenSurfaceForms.add(hit.key);
+ hits.add(hit);
+ if (hits.size() == num) {
+ break;
+ }
+ }
+ }
+ suggestScoreDocs = hits.toArray(new SuggestScoreDoc[0]);
+ } else {
+ suggestScoreDocs = priorityQueue.getResults();
+ }
+
if (suggestScoreDocs.length > 0) {
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
} else {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
index 35661ee..2c5dcd8 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
@@ -89,7 +89,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2);
query.addContext("type3", 3);
query.addContext("type4", 4);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion4", "type4", 5 * 4),
new Entry("suggestion3", "type3", 6 * 3),
@@ -124,7 +124,7 @@ public class TestContextQuery extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")));
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- suggestIndexSearcher.suggest(query, 4);
+ suggestIndexSearcher.suggest(query, 4, false);
});
assertTrue(expected.getMessage().contains("SuggestField"));
@@ -155,7 +155,7 @@ public class TestContextQuery extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type", 1, false);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3),
@@ -185,7 +185,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type", 1);
query.addContext("typetype", 2);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion1", "typetype", 4 * 2),
new Entry("suggestion2", "type", 3 * 1)
@@ -215,7 +215,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion_no_ctx", null, 4),
new Entry("suggestion", "type4", 1));
@@ -249,7 +249,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type4", 10);
query.addAllContexts();
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion4", "type4", 1 * 10),
new Entry("suggestion1", null, 4),
@@ -284,7 +284,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2);
query.addContext("type3", 3);
query.addContext("type4", 4);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion", "type1", 4 * 10),
new Entry("suggestion", "type3", 4 * 3),
@@ -321,7 +321,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type1", 7);
query.addContext("type2", 6);
query.addAllContexts();
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4 * 7),
new Entry("suggestion2", "type2", 3 * 6),
@@ -357,7 +357,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type3", 3);
query.addContext("type4", 4);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion3", "type3", 2 * 3),
new Entry("suggestion4", "type4", 1 * 4)
@@ -389,7 +389,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3),
@@ -426,7 +426,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2);
query.addContext("type3", 3);
query.addContext("type4", 4);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type3", 8 * 3),
new Entry("suggestion4", "type4", 5 * 4),
@@ -460,7 +460,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3),
@@ -520,7 +520,7 @@ public class TestContextQuery extends LuceneTestCase {
for (int i = 0; i < contexts.size(); i++) {
query.addContext(contexts.get(i), i + 1);
}
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, Arrays.copyOfRange(expectedResults, 0, 4));
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
index 9f207f8..0c3b254 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
@@ -172,7 +172,7 @@ public class TestContextSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10, false);
assertSuggestions(suggest,
new Entry("suggestion1", 4),
new Entry("suggestion2", 3),
@@ -180,7 +180,7 @@ public class TestContextSuggestField extends LuceneTestCase {
new Entry("suggestion4", 1));
query = new PrefixCompletionQuery(analyzer, new Term("context_suggest_field", "sugg"));
- suggest = suggestIndexSearcher.suggest(query, 10);
+ suggest = suggestIndexSearcher.suggest(query, 10, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3),
@@ -212,14 +212,14 @@ public class TestContextSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(completionAnalyzer, new Term("suggest_field", "sugg")));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3),
new Entry("suggestion3", "type3", 2),
new Entry("suggestion4", "type4", 1));
query.addContext("type1");
- suggest = suggestIndexSearcher.suggest(query, 4);
+ suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4));
reader.close();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
index 9a773ca..40c3f88 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
@@ -66,7 +66,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest,
new Entry("suaggestion", 4 * 2),
new Entry("suggestion", 2 * 3),
@@ -101,7 +101,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge")));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("suggestion", "type4", 4),
new Entry("suggdestion", "type4", 4),
@@ -140,7 +140,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
ContextQuery contextQuery = new ContextQuery(fuzzyQuery);
contextQuery.addContext("type1", 6);
contextQuery.addContext("type3", 2);
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
assertSuggestions(suggest,
new Entry("sduggestion", "type1", 1 * (1 + 6)),
new Entry("sugdgestion", "type3", 1 * (3 + 2))
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
index f5bacef..515ac2d 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
@@ -135,7 +135,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
- TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
+ TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));
reader.close();
@@ -165,7 +165,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if at most half of the top scoring documents have been filtered out
// the search should be admissible for a single segment
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits >= 1);
assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
@@ -174,14 +174,14 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if more than half of the top scoring documents have been filtered out
// search is not admissible, so # of suggestions requested is num instead of 1
- suggest = indexSearcher.suggest(query, num);
+ suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, new Entry("abc_0", 0));
filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if only lower scoring documents are filtered out
// search is admissible
- suggest = indexSearcher.suggest(query, 1);
+ suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));
reader.close();
@@ -216,13 +216,13 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
// suggest without filter
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 3);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));
// suggest with filter
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
- suggest = indexSearcher.suggest(query, 3);
+ suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));
reader.close();
@@ -243,10 +243,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // all 4
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob"));
- suggest = indexSearcher.suggest(query, 4); // not the fo
+ suggest = indexSearcher.suggest(query, 4, false); // not the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close();
iw.close();
@@ -266,10 +266,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 4); //matches all 4
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); //matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob"));
- suggest = indexSearcher.suggest(query, 4); // only foobar
+ suggest = indexSearcher.suggest(query, 4, false); // only foobar
assertSuggestions(suggest, new Entry("foobar", 7));
reader.close();
iw.close();
@@ -289,10 +289,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // matches all 4
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob"));
- suggest = indexSearcher.suggest(query, 4); // except the fo
+ suggest = indexSearcher.suggest(query, 4, false); // except the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close();
iw.close();
@@ -329,10 +329,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
- assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
+ assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
- assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
+ assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));
reader.close();
iw.close();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
index 23710e9..2dd7184 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
@@ -67,7 +67,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|w|s]s?ugg"));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, new Entry("wsuggestion", 4), new Entry("ssuggestion", 3),
new Entry("asuggestion", 2), new Entry("suggestion", 1));
@@ -98,7 +98,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|s][d|u|s][u|d|g]"));
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest,
new Entry("sduggestion", "type1", 5),
new Entry("sudggestion", "type2", 4),
@@ -137,7 +137,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
contextQuery.addContext("type1", 6);
contextQuery.addContext("type3", 7);
contextQuery.addAllContexts();
- TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
+ TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
assertSuggestions(suggest,
new Entry("sduggestion", "type1", 5 * 6),
new Entry("sugdgestion", "type3", 3 * 7),
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e2cf61a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index fe9992d..3efb50d 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -20,7 +20,10 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -33,9 +36,9 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene70.Lucene70Codec;
-import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
@@ -122,7 +125,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
- TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
+ TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertThat(lookupDocs.totalHits, equalTo(0));
reader.close();
iw.close();
@@ -157,7 +160,7 @@ public class TestSuggestField extends LuceneTestCase {
int[] weights = new int[num];
for(int i = 0; i < num; i++) {
Document document = new Document();
- weights[i] = Math.abs(random().nextInt());
+ weights[i] = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document);
@@ -175,12 +178,230 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
- TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num);
+ TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
+ assertSuggestions(lookupDocs, expectedEntries);
+
+ reader.close();
+ iw.close();
+ }
+
+ public void testDeduplication() throws Exception {
+ Analyzer analyzer = new MockAnalyzer(random());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
+ final int num = TestUtil.nextInt(random(), 2, 20);
+ int[] weights = new int[num];
+ int bestABCWeight = Integer.MIN_VALUE;
+ int bestABDWeight = Integer.MIN_VALUE;
+ for(int i = 0; i < num; i++) {
+ Document document = new Document();
+ weights[i] = random().nextInt(Integer.MAX_VALUE);
+ String suggestValue;
+ boolean doABC;
+ if (i == 0) {
+ doABC = true;
+ } else if (i == 1) {
+ doABC = false;
+ } else {
+ doABC = random().nextBoolean();
+ }
+ if (doABC) {
+ suggestValue = "abc";
+ bestABCWeight = Math.max(bestABCWeight, weights[i]);
+ } else {
+ suggestValue = "abd";
+ bestABDWeight = Math.max(bestABDWeight, weights[i]);
+ }
+ document.add(new SuggestField("suggest_field", suggestValue, weights[i]));
+ iw.addDocument(document);
+
+ if (usually()) {
+ iw.commit();
+ }
+ }
+
+ DirectoryReader reader = iw.getReader();
+ Entry[] expectedEntries = new Entry[2];
+ if (bestABDWeight > bestABCWeight) {
+ expectedEntries[0] = new Entry("abd", bestABDWeight);
+ expectedEntries[1] = new Entry("abc", bestABCWeight);
+ } else {
+ expectedEntries[0] = new Entry("abc", bestABCWeight);
+ expectedEntries[1] = new Entry("abd", bestABDWeight);
+ }
+
+ SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
+ PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
+ TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
+ suggestIndexSearcher.suggest(query, collector);
+ TopSuggestDocs lookupDocs = collector.get();
+ assertSuggestions(lookupDocs, expectedEntries);
+
+ reader.close();
+ iw.close();
+ }
+
+ public void testExtremeDeduplication() throws Exception {
+ Analyzer analyzer = new MockAnalyzer(random());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
+ final int num = atLeast(5000);
+ int bestWeight = Integer.MIN_VALUE;
+ for(int i = 0; i < num; i++) {
+ Document document = new Document();
+ int weight = TestUtil.nextInt(random(), 10, 100);
+ bestWeight = Math.max(weight, bestWeight);
+ document.add(new SuggestField("suggest_field", "abc", weight));
+ iw.addDocument(document);
+ if (rarely()) {
+ iw.commit();
+ }
+ }
+ Document document = new Document();
+ document.add(new SuggestField("suggest_field", "abd", 7));
+ iw.addDocument(document);
+
+ if (random().nextBoolean()) {
+ iw.forceMerge(1);
+ }
+
+ DirectoryReader reader = iw.getReader();
+ Entry[] expectedEntries = new Entry[2];
+ expectedEntries[0] = new Entry("abc", bestWeight);
+ expectedEntries[1] = new Entry("abd", 7);
+
+ SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
+ PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
+ TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
+ suggestIndexSearcher.suggest(query, collector);
+ TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
+
+ private static String randomSimpleString(int numDigits, int maxLen) {
+ final int len = TestUtil.nextInt(random(), 1, maxLen);
+ final char[] chars = new char[len];
+ for(int j=0;j<len;j++) {
+ chars[j] = (char) ('a' + random().nextInt(numDigits));
+ }
+ return new String(chars);
+ }
+
+ public void testRandom() throws Exception {
+ int numDigits = TestUtil.nextInt(random(), 1, 6);
+ Set<String> keys = new HashSet<>();
+ int keyCount = TestUtil.nextInt(random(), 1, 20);
+ if (numDigits == 1) {
+ keyCount = Math.min(9, keyCount);
+ }
+ while (keys.size() < keyCount) {
+ keys.add(randomSimpleString(numDigits, 10));
+ }
+ List<String> keysList = new ArrayList<>(keys);
+
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
+ // we rely on docID order:
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+ int docCount = TestUtil.nextInt(random(), 1, 200);
+ Entry[] docs = new Entry[docCount];
+ for(int i=0;i<docCount;i++) {
+ int weight = random().nextInt(40);
+ String key = keysList.get(random().nextInt(keyCount));
+ //System.out.println("KEY: " + key);
+ docs[i] = new Entry(key, null, weight, i);
+ Document doc = new Document();
+ doc.add(new SuggestField("suggest_field", key, weight));
+ iw.addDocument(doc);
+ if (usually()) {
+ iw.commit();
+ }
+ }
+
+ DirectoryReader reader = iw.getReader();
+ SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
+
+ int iters = atLeast(200);
+ for(int iter=0;iter<iters;iter++) {
+ String prefix = randomSimpleString(numDigits, 2);
+ if (VERBOSE) {
+ System.out.println("\nTEST: prefix=" + prefix);
+ }
+
+ // slow but hopefully correct suggester:
+ List<Entry> expected = new ArrayList<>();
+ for(Entry doc : docs) {
+ if (doc.output.startsWith(prefix)) {
+ expected.add(doc);
+ }
+ }
+ Collections.sort(expected,
+ new Comparator<Entry>() {
+ @Override
+ public int compare(Entry a, Entry b) {
+ // sort by higher score:
+ int cmp = Float.compare(b.value, a.value);
+ if (cmp == 0) {
+ // tie break by smaller docID:
+ cmp = Integer.compare(a.id, b.id);
+ }
+ return cmp;
+ }
+ });
+
+ boolean dedup = random().nextBoolean();
+ if (dedup) {
+ List<Entry> deduped = new ArrayList<>();
+ Set<String> seen = new HashSet<>();
+ for(Entry entry : expected) {
+ if (seen.contains(entry.output) == false) {
+ seen.add(entry.output);
+ deduped.add(entry);
+ }
+ }
+ expected = deduped;
+ }
+
+ // TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
+ //int topN = TestUtil.nextInt(random(), 1, docCount+10);
+ int topN = docCount;
+
+ if (VERBOSE) {
+ if (dedup) {
+ System.out.println(" expected (dedup'd) topN=" + topN + ":");
+ } else {
+ System.out.println(" expected topN=" + topN + ":");
+ }
+ for(int i=0;i<expected.size();i++) {
+ if (i >= topN) {
+ System.out.println(" leftover: " + i + ": " + expected.get(i));
+ } else {
+ System.out.println(" " + i + ": " + expected.get(i));
+ }
+ }
+ }
+ expected = expected.subList(0, Math.min(topN, expected.size()));
+
+ PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
+ TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
+ searcher.suggest(query, collector);
+ TopSuggestDocs actual = collector.get();
+ if (VERBOSE) {
+ System.out.println(" actual:");
+ SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
+ for(int i=0;i<suggestScoreDocs.length;i++) {
+ System.out.println(" " + i + ": " + suggestScoreDocs[i]);
+ }
+ }
+
+ assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
+ }
+
+ reader.close();
+ iw.close();
+ }
@Test
public void testNRTDeletedDocFiltering() throws Exception {
@@ -214,7 +435,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, numLive);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
reader.close();
@@ -248,7 +469,7 @@ public class TestSuggestField extends LuceneTestCase {
// no random access required;
// calling suggest with filter that does not match any documents should early terminate
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0));
reader.close();
iw.close();
@@ -276,7 +497,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0));
reader.close();
@@ -306,7 +527,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_1", 1));
reader.close();
@@ -335,10 +556,10 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
- TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
- TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4);
+ TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));
// check that the doc ids are consistent
@@ -372,7 +593,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + num, num));
reader.close();
@@ -402,7 +623,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size());
+ TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
reader.close();
@@ -430,7 +651,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertEquals(num, suggest.totalHits);
for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
String key = suggestScoreDoc.key.toString();
@@ -456,7 +677,7 @@ public class TestSuggestField extends LuceneTestCase {
for (int i = 0; i < num; i++) {
Document document = new Document();
String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i);
- int weight = Math.abs(random().nextInt());
+ int weight = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight);
iw.addDocument(document);
@@ -470,7 +691,7 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (String prefix : prefixes) {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits > 0);
float topScore = -1;
for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
@@ -498,7 +719,7 @@ public class TestSuggestField extends LuceneTestCase {
for (int i = 0; i < num; i++) {
Document document = lineFileDocs.nextDoc();
String title = document.getField("title").stringValue();
- int weight = Math.abs(random().nextInt());
+ int weight = random().nextInt(Integer.MAX_VALUE);
Integer prevWeight = mappings.get(title);
if (prevWeight == null || prevWeight < weight) {
mappings.put(title, weight);
@@ -519,7 +740,7 @@ public class TestSuggestField extends LuceneTestCase {
String title = entry.getKey();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
- TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size());
+ TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false);
assertTrue(suggest.totalHits > 0);
boolean matched = false;
for (ScoreDoc scoreDoc : suggest.scoreDocs) {
@@ -577,13 +798,13 @@ public class TestSuggestField extends LuceneTestCase {
try {
startingGun.await();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1));
- TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+ TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2));
- suggest = indexSearcher.suggest(query, num);
+ suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries2);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3));
- suggest = indexSearcher.suggest(query, num);
+ suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries3);
} catch (Throwable e) {
errors.add(e);
@@ -607,28 +828,39 @@ public class TestSuggestField extends LuceneTestCase {
final String output;
final float value;
final String context;
+ final int id;
Entry(String output, float value) {
this(output, null, value);
}
Entry(String output, String context, float value) {
+ this(output, context, value, -1);
+ }
+
+ Entry(String output, String context, float value, int id) {
this.output = output;
this.value = value;
this.context = context;
+ this.id = id;
+ }
+
+ @Override
+ public String toString() {
+ return "key=" + output + " score=" + value + " context=" + context + " id=" + id;
}
}
static void assertSuggestions(TopDocs actual, Entry... expected) {
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
- assertThat(suggestScoreDocs.length, equalTo(expected.length));
- for (int i = 0; i < suggestScoreDocs.length; i++) {
+ for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) {
SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
- String msg = "Expected: " + toString(expected[i]) + " Actual: " + toString(lookupDoc);
+ String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc);
assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
}
+ assertThat(suggestScoreDocs.length, equalTo(expected.length));
}
private static String toString(Entry expected) {