You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/10/20 12:43:46 UTC
svn commit: r1400417 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/fst/ lucene/suggest/
lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/
lucene/suggest/src/test/org/apache/lu...
Author: mikemccand
Date: Sat Oct 20 10:43:45 2012
New Revision: 1400417
URL: http://svn.apache.org/viewvc?rev=1400417&view=rev
Log:
LUCENE-4481: turn off queue pruning (it's not in general admissible)
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Sat Oct 20 10:43:45 2012
@@ -290,6 +290,8 @@ public final class Util {
T cost = fst.outputs.add(path.cost, path.arc.output);
//System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
+ // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+ /*
if (queue.size() == topN) {
FSTPath<T> bottom = queue.last();
int comp = comparator.compare(cost, bottom.cost);
@@ -312,6 +314,7 @@ public final class Util {
} else {
// Queue isn't full yet, so any path we hit competes:
}
+ */
// copy over the current input to the new input
// and add the arc.label to the end
@@ -323,9 +326,12 @@ public final class Util {
queue.add(newPath);
+ // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+ /*
if (queue.size() == topN+1) {
queue.pollLast();
- }
+ }
+ */
}
/** Adds all leaving arcs, including 'finished' arc, if
@@ -390,8 +396,6 @@ public final class Util {
break;
}
- //System.out.println(" remove init path=" + path);
-
if (path.arc.label == FST.END_LABEL) {
//System.out.println(" empty string! cost=" + path.cost);
// Empty string!
@@ -400,10 +404,13 @@ public final class Util {
continue;
}
+ // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+ /*
if (results.size() == topN-1) {
// Last path -- don't bother w/ queue anymore:
queue = null;
}
+ */
//System.out.println(" path: " + path);
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sat Oct 20 10:43:45 2012
@@ -591,16 +591,18 @@ public class AnalyzingSuggester extends
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
- num - results.size(),
+ num,
weightComparator) {
private final Set<BytesRef> seen = new HashSet<BytesRef>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
-
+
+ //System.out.println("ACCEPT? path=" + input);
// Dedup: when the input analyzes to a graph we
// can get duplicate surface forms:
if (seen.contains(output.output2)) {
+ //System.out.println("SKIP: dup");
return false;
}
seen.add(output.output2);
@@ -630,6 +632,12 @@ public class AnalyzingSuggester extends
LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
//System.out.println(" result=" + result);
results.add(result);
+
+ if (results.size() == num) {
+ // In the exactFirst=true case the search may
+ // produce one extra path
+ break;
+ }
}
return results;
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Sat Oct 20 10:43:45 2012
@@ -803,4 +803,114 @@ public class AnalyzingSuggesterTest exte
List<LookupResult> results = suggester.lookup("a", false, 4);
}
+
+ public void testExactFirstMissingResult() throws Exception {
+
+ Analyzer a = new MockAnalyzer(random());
+
+ AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
+
+ suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+ new TermFreq("a", 5),
+ new TermFreq("a b", 3),
+ new TermFreq("a c", 4),
+ }));
+
+ List<LookupResult> results = suggester.lookup("a", false, 3);
+ assertEquals(3, results.size());
+ assertEquals("a", results.get(0).key);
+ assertEquals(5, results.get(0).value);
+ assertEquals("a c", results.get(1).key);
+ assertEquals(4, results.get(1).value);
+ assertEquals("a b", results.get(2).key);
+ assertEquals(3, results.get(2).value);
+ }
+
+ public void testDupSurfaceFormsMissingResults() throws Exception {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+
+ return new TokenStreamComponents(tokenizer) {
+
+ @Override
+ public TokenStream getTokenStream() {
+ return new CannedTokenStream(new Token[] {
+ token("hairy", 1, 1),
+ token("smelly", 0, 1),
+ token("dog", 1, 1),
+ });
+ }
+
+ @Override
+ protected void setReader(final Reader reader) throws IOException {
+ }
+ };
+ }
+ };
+
+ AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
+
+ suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+ new TermFreq("hambone", 6),
+ new TermFreq("nellie", 5),
+ }));
+
+ List<LookupResult> results = suggester.lookup("nellie", false, 2);
+ assertEquals(2, results.size());
+ assertEquals("hambone", results.get(0).key);
+ assertEquals(6, results.get(0).value);
+ assertEquals("nellie", results.get(1).key);
+ assertEquals(5, results.get(1).value);
+ }
+
+ public void testDupSurfaceFormsMissingResults2() throws Exception {
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+
+ return new TokenStreamComponents(tokenizer) {
+
+ int count;
+
+ @Override
+ public TokenStream getTokenStream() {
+ if (count == 0) {
+ count++;
+ return new CannedTokenStream(new Token[] {
+ token("p", 1, 1),
+ token("q", 1, 1),
+ token("r", 0, 1),
+ token("s", 0, 1),
+ });
+ } else {
+ return new CannedTokenStream(new Token[] {
+ token("p", 1, 1),
+ });
+ }
+ }
+
+ @Override
+ protected void setReader(final Reader reader) throws IOException {
+ }
+ };
+ }
+ };
+
+ AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
+
+ suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+ new TermFreq("a", 6),
+ new TermFreq("b", 5),
+ }));
+
+ List<LookupResult> results = suggester.lookup("a", false, 2);
+ assertEquals(2, results.size());
+ assertEquals("a", results.get(0).key);
+ assertEquals(6, results.get(0).value);
+ assertEquals("b", results.get(1).key);
+ assertEquals(5, results.get(1).value);
+ }
}