You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/10/20 12:43:46 UTC

svn commit: r1400417 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/fst/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/ lucene/suggest/src/test/org/apache/lu...

Author: mikemccand
Date: Sat Oct 20 10:43:45 2012
New Revision: 1400417

URL: http://svn.apache.org/viewvc?rev=1400417&view=rev
Log:
LUCENE-4481: turn off queue pruning (it's not in general admissible)

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Sat Oct 20 10:43:45 2012
@@ -290,6 +290,8 @@ public final class Util {
       T cost = fst.outputs.add(path.cost, path.arc.output);
       //System.out.println("  addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
 
+      // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+      /*
       if (queue.size() == topN) {
         FSTPath<T> bottom = queue.last();
         int comp = comparator.compare(cost, bottom.cost);
@@ -312,6 +314,7 @@ public final class Util {
       } else {
         // Queue isn't full yet, so any path we hit competes:
       }
+      */
 
       // copy over the current input to the new input
       // and add the arc.label to the end
@@ -323,9 +326,12 @@ public final class Util {
 
       queue.add(newPath);
 
+      // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+      /*
       if (queue.size() == topN+1) {
         queue.pollLast();
-      } 
+      }
+      */
     }
 
     /** Adds all leaving arcs, including 'finished' arc, if
@@ -390,8 +396,6 @@ public final class Util {
           break;
         }
 
-        //System.out.println("  remove init path=" + path);
-
         if (path.arc.label == FST.END_LABEL) {
           //System.out.println("    empty string!  cost=" + path.cost);
           // Empty string!
@@ -400,10 +404,13 @@ public final class Util {
           continue;
         }
 
+        // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
+        /*
         if (results.size() == topN-1) {
           // Last path -- don't bother w/ queue anymore:
           queue = null;
         }
+        */
 
         //System.out.println("  path: " + path);
         

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sat Oct 20 10:43:45 2012
@@ -591,16 +591,18 @@ public class AnalyzingSuggester extends 
 
       Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
       searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
-                                                            num - results.size(),
+                                                            num,
                                                             weightComparator) {
         private final Set<BytesRef> seen = new HashSet<BytesRef>();
 
         @Override
         protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
-          
+
+          //System.out.println("ACCEPT? path=" + input);
           // Dedup: when the input analyzes to a graph we
           // can get duplicate surface forms:
           if (seen.contains(output.output2)) {
+            //System.out.println("SKIP: dup");
             return false;
           }
           seen.add(output.output2);
@@ -630,6 +632,12 @@ public class AnalyzingSuggester extends 
         LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
         //System.out.println("    result=" + result);
         results.add(result);
+
+        if (results.size() == num) {
+          // In the exactFirst=true case the search may
+          // produce one extra path
+          break;
+        }
       }
 
       return results;

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1400417&r1=1400416&r2=1400417&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Sat Oct 20 10:43:45 2012
@@ -803,4 +803,114 @@ public class AnalyzingSuggesterTest exte
 
     List<LookupResult> results = suggester.lookup("a", false, 4);
   }
+
+  public void testExactFirstMissingResult() throws Exception {
+
+    Analyzer a = new MockAnalyzer(random());
+
+    AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq("a", 5),
+          new TermFreq("a b", 3),
+          new TermFreq("a c", 4),
+        }));
+
+    List<LookupResult> results = suggester.lookup("a", false, 3);
+    assertEquals(3, results.size());
+    assertEquals("a", results.get(0).key);
+    assertEquals(5, results.get(0).value);
+    assertEquals("a c", results.get(1).key);
+    assertEquals(4, results.get(1).value);
+    assertEquals("a b", results.get(2).key);
+    assertEquals(3, results.get(2).value);
+  }
+
+  public void testDupSurfaceFormsMissingResults() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        
+        return new TokenStreamComponents(tokenizer) {
+
+          @Override
+          public TokenStream getTokenStream() {
+            return new CannedTokenStream(new Token[] {
+                token("hairy", 1, 1),
+                token("smelly", 0, 1),
+                token("dog", 1, 1),
+              });
+          }
+         
+          @Override
+          protected void setReader(final Reader reader) throws IOException {
+          }
+        };
+      }
+    };
+
+    AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq("hambone", 6),
+          new TermFreq("nellie", 5),
+        }));
+
+    List<LookupResult> results = suggester.lookup("nellie", false, 2);
+    assertEquals(2, results.size());
+    assertEquals("hambone", results.get(0).key);
+    assertEquals(6, results.get(0).value);
+    assertEquals("nellie", results.get(1).key);
+    assertEquals(5, results.get(1).value);
+  }
+
+  public void testDupSurfaceFormsMissingResults2() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        
+        return new TokenStreamComponents(tokenizer) {
+
+          int count;
+
+          @Override
+          public TokenStream getTokenStream() {
+            if (count == 0) {
+              count++;
+              return new CannedTokenStream(new Token[] {
+                  token("p", 1, 1),
+                  token("q", 1, 1),
+                  token("r", 0, 1),
+                  token("s", 0, 1),
+                });
+            } else {
+              return new CannedTokenStream(new Token[] {
+                  token("p", 1, 1),
+                });
+            }
+          }
+         
+          @Override
+          protected void setReader(final Reader reader) throws IOException {
+          }
+        };
+      }
+    };
+
+    AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq("a", 6),
+          new TermFreq("b", 5),
+        }));
+
+    List<LookupResult> results = suggester.lookup("a", false, 2);
+    assertEquals(2, results.size());
+    assertEquals("a", results.get(0).key);
+    assertEquals(6, results.get(0).value);
+    assertEquals("b", results.get(1).key);
+    assertEquals(5, results.get(1).value);
+  }
 }