You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/10 19:28:27 UTC
svn commit: r1407870 - in /lucene/dev/branches/branch_4x/lucene/suggest: ./
src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Author: mikemccand
Date: Sat Nov 10 18:28:26 2012
New Revision: 1407870
URL: http://svn.apache.org/viewvc?rev=1407870&view=rev
Log:
fix tie-break in test to use analyzed form not surface form
Modified:
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1407870&r1=1407869&r2=1407870&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sat Nov 10 18:28:26 2012
@@ -31,7 +31,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.fst.Sort;
@@ -81,6 +80,11 @@ import org.apache.lucene.util.fst.Util;
* variations.
*
* <p>
+ * When two matching suggestions have the same weight, they
+ * are tie-broken by the analyzed form. If their analyzed
+ * form is the same then the order is undefined.
+ *
+ * <p>
* There are some limitations:
* <ul>
*
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1407870&r1=1407869&r2=1407870&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Sat Nov 10 18:28:26 2012
@@ -447,6 +447,11 @@ public class AnalyzingSuggesterTest exte
return 0;
}
}
+
+ @Override
+ public String toString() {
+ return surfaceForm + "/" + weight;
+ }
}
static boolean isStopChar(char ch, int numStopChars) {
@@ -525,6 +530,8 @@ public class AnalyzingSuggesterTest exte
}
}
+ private static char SEP = '\uFFFF';
+
public void testRandom() throws Exception {
int numQueries = atLeast(1000);
@@ -561,13 +568,13 @@ public class AnalyzingSuggesterTest exte
if (token > 0) {
key += " ";
}
- if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != ' ') {
- analyzedKey += " ";
+ if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != SEP) {
+ analyzedKey += SEP;
}
key += s;
if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
if (preserveSep && preserveHoles) {
- analyzedKey += '\u0000';
+ analyzedKey += SEP;
}
} else {
analyzedKey += s;
@@ -577,7 +584,7 @@ public class AnalyzingSuggesterTest exte
}
}
- analyzedKey = analyzedKey.replaceAll("(^| )\u0000$", "");
+ analyzedKey = analyzedKey.replaceAll("(^|" + SEP + ")" + SEP + "$", "");
// Don't add same surface form more than once:
if (!seen.contains(key)) {
@@ -602,7 +609,7 @@ public class AnalyzingSuggesterTest exte
List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
Collections.sort(sorted);
for(TermFreq2 ent : sorted) {
- System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
+ System.out.println(" surface='" + ent.surfaceForm + "' analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
}
}
@@ -621,20 +628,20 @@ public class AnalyzingSuggesterTest exte
List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random()), false, topN);
// 2. go thru whole set to find suggestions:
- List<LookupResult> matches = new ArrayList<LookupResult>();
+ List<TermFreq2> matches = new ArrayList<TermFreq2>();
// "Analyze" the key:
String[] tokens = prefix.split(" ");
StringBuilder builder = new StringBuilder();
for(int i=0;i<tokens.length;i++) {
String token = tokens[i];
- if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(" ")) {
- builder.append(' ');
+ if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(""+SEP)) {
+ builder.append(SEP);
}
if (token.length() == 1 && isStopChar(token.charAt(0), numStopChars)) {
if (preserveSep && preserveHoles) {
- builder.append("\u0000");
+ builder.append(SEP);
}
} else {
builder.append(token);
@@ -647,8 +654,7 @@ public class AnalyzingSuggesterTest exte
// not tell us any trailing holes, yet ... there is an
// issue open for this):
while (true) {
- String s = analyzedKey.replaceAll("(^| )\u0000$", "");
- s = s.replaceAll("\\s+$", "");
+ String s = analyzedKey.replaceAll(SEP + "$", "");
if (s.equals(analyzedKey)) {
break;
}
@@ -668,18 +674,18 @@ public class AnalyzingSuggesterTest exte
// TODO: could be faster... but its slowCompletor for a reason
for (TermFreq2 e : slowCompletor) {
if (e.analyzedForm.startsWith(analyzedKey)) {
- matches.add(new LookupResult(e.surfaceForm, e.weight));
+ matches.add(e);
}
}
assertTrue(numStopChars > 0 || matches.size() > 0);
if (matches.size() > 1) {
- Collections.sort(matches, new Comparator<LookupResult>() {
- public int compare(LookupResult left, LookupResult right) {
- int cmp = Float.compare(right.value, left.value);
+ Collections.sort(matches, new Comparator<TermFreq2>() {
+ public int compare(TermFreq2 left, TermFreq2 right) {
+ int cmp = Float.compare(right.weight, left.weight);
if (cmp == 0) {
- return left.compareTo(right);
+ return left.analyzedForm.compareTo(right.analyzedForm);
} else {
return cmp;
}
@@ -693,8 +699,8 @@ public class AnalyzingSuggesterTest exte
if (VERBOSE) {
System.out.println(" expected:");
- for(LookupResult lr : matches) {
- System.out.println(" key=" + lr.key + " weight=" + lr.value);
+ for(TermFreq2 lr : matches) {
+ System.out.println(" key=" + lr.surfaceForm + " weight=" + lr.weight);
}
System.out.println(" actual:");
@@ -707,8 +713,8 @@ public class AnalyzingSuggesterTest exte
for(int hit=0;hit<r.size();hit++) {
//System.out.println(" check hit " + hit);
- assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
- assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
+ assertEquals(matches.get(hit).surfaceForm.toString(), r.get(hit).key.toString());
+ assertEquals(matches.get(hit).weight, r.get(hit).value, 0f);
}
}
}
@@ -809,7 +815,7 @@ public class AnalyzingSuggesterTest exte
new TermFreq("a c b", 1),
}));
- List<LookupResult> results = suggester.lookup("a", false, 4);
+ suggester.lookup("a", false, 4);
}
public void testExactFirstMissingResult() throws Exception {