You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/10 19:28:27 UTC

svn commit: r1407870 - in /lucene/dev/branches/branch_4x/lucene/suggest: ./ src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java

Author: mikemccand
Date: Sat Nov 10 18:28:26 2012
New Revision: 1407870

URL: http://svn.apache.org/viewvc?rev=1407870&view=rev
Log:
fix tie-break in test to use analyzed form not surface form

Modified:
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1407870&r1=1407869&r2=1407870&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sat Nov 10 18:28:26 2012
@@ -31,7 +31,6 @@ import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.fst.Sort;
@@ -81,6 +80,11 @@ import org.apache.lucene.util.fst.Util;
  * variations.
  *
  * <p>
+ * When two matching suggestions have the same weight, they
+ * are tie-broken by the analyzed form.  If their analyzed
+ * form is the same then the order is undefined.
+ *
+ * <p>
  * There are some limitations:
  * <ul>
  *

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1407870&r1=1407869&r2=1407870&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Sat Nov 10 18:28:26 2012
@@ -447,6 +447,11 @@ public class AnalyzingSuggesterTest exte
         return 0;
       }
     }
+
+    @Override
+    public String toString() {
+      return surfaceForm + "/" + weight;
+    }
   }
 
   static boolean isStopChar(char ch, int numStopChars) {
@@ -525,6 +530,8 @@ public class AnalyzingSuggesterTest exte
     }
   }
 
+  private static char SEP = '\uFFFF';
+
   public void testRandom() throws Exception {
 
     int numQueries = atLeast(1000);
@@ -561,13 +568,13 @@ public class AnalyzingSuggesterTest exte
               if (token > 0) {
                 key += " ";
               }
-              if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != ' ') {
-                analyzedKey += " ";
+              if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != SEP) {
+                analyzedKey += SEP;
               }
               key += s;
               if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
                 if (preserveSep && preserveHoles) {
-                  analyzedKey += '\u0000';
+                  analyzedKey += SEP;
                 }
               } else {
                 analyzedKey += s;
@@ -577,7 +584,7 @@ public class AnalyzingSuggesterTest exte
           }
         }
 
-        analyzedKey = analyzedKey.replaceAll("(^| )\u0000$", "");
+        analyzedKey = analyzedKey.replaceAll("(^|" + SEP + ")" + SEP + "$", "");
 
         // Don't add same surface form more than once:
         if (!seen.contains(key)) {
@@ -602,7 +609,7 @@ public class AnalyzingSuggesterTest exte
       List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
       Collections.sort(sorted);
       for(TermFreq2 ent : sorted) {
-        System.out.println("  surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
+        System.out.println("  surface='" + ent.surfaceForm + "' analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
       }
     }
 
@@ -621,20 +628,20 @@ public class AnalyzingSuggesterTest exte
       List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random()), false, topN);
 
       // 2. go thru whole set to find suggestions:
-      List<LookupResult> matches = new ArrayList<LookupResult>();
+      List<TermFreq2> matches = new ArrayList<TermFreq2>();
 
       // "Analyze" the key:
       String[] tokens = prefix.split(" ");
       StringBuilder builder = new StringBuilder();
       for(int i=0;i<tokens.length;i++) {
         String token = tokens[i];
-        if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(" ")) {
-          builder.append(' ');
+        if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(""+SEP)) {
+          builder.append(SEP);
         }
 
         if (token.length() == 1 && isStopChar(token.charAt(0), numStopChars)) {
           if (preserveSep && preserveHoles) {
-            builder.append("\u0000");
+            builder.append(SEP);
           }
         } else {
           builder.append(token);
@@ -647,8 +654,7 @@ public class AnalyzingSuggesterTest exte
       // not tell us any trailing holes, yet ... there is an
       // issue open for this):
       while (true) {
-        String s = analyzedKey.replaceAll("(^| )\u0000$", "");
-        s = s.replaceAll("\\s+$", "");
+        String s = analyzedKey.replaceAll(SEP + "$", "");
         if (s.equals(analyzedKey)) {
           break;
         }
@@ -668,18 +674,18 @@ public class AnalyzingSuggesterTest exte
       // TODO: could be faster... but its slowCompletor for a reason
       for (TermFreq2 e : slowCompletor) {
         if (e.analyzedForm.startsWith(analyzedKey)) {
-          matches.add(new LookupResult(e.surfaceForm, e.weight));
+          matches.add(e);
         }
       }
 
       assertTrue(numStopChars > 0 || matches.size() > 0);
 
       if (matches.size() > 1) {
-        Collections.sort(matches, new Comparator<LookupResult>() {
-            public int compare(LookupResult left, LookupResult right) {
-              int cmp = Float.compare(right.value, left.value);
+        Collections.sort(matches, new Comparator<TermFreq2>() {
+            public int compare(TermFreq2 left, TermFreq2 right) {
+              int cmp = Float.compare(right.weight, left.weight);
               if (cmp == 0) {
-                return left.compareTo(right);
+                return left.analyzedForm.compareTo(right.analyzedForm);
               } else {
                 return cmp;
               }
@@ -693,8 +699,8 @@ public class AnalyzingSuggesterTest exte
 
       if (VERBOSE) {
         System.out.println("  expected:");
-        for(LookupResult lr : matches) {
-          System.out.println("    key=" + lr.key + " weight=" + lr.value);
+        for(TermFreq2 lr : matches) {
+          System.out.println("    key=" + lr.surfaceForm + " weight=" + lr.weight);
         }
 
         System.out.println("  actual:");
@@ -707,8 +713,8 @@ public class AnalyzingSuggesterTest exte
 
       for(int hit=0;hit<r.size();hit++) {
         //System.out.println("  check hit " + hit);
-        assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
-        assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
+        assertEquals(matches.get(hit).surfaceForm.toString(), r.get(hit).key.toString());
+        assertEquals(matches.get(hit).weight, r.get(hit).value, 0f);
       }
     }
   }
@@ -809,7 +815,7 @@ public class AnalyzingSuggesterTest exte
           new TermFreq("a c b", 1),
         }));
 
-    List<LookupResult> results = suggester.lookup("a", false, 4);
+    suggester.lookup("a", false, 4);
   }
 
   public void testExactFirstMissingResult() throws Exception {