You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/07/30 12:45:39 UTC
svn commit: r1508382 - in /lucene/dev/trunk/lucene: CHANGES.txt
suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Author: simonw
Date: Tue Jul 30 10:45:38 2013
New Revision: 1508382
URL: http://svn.apache.org/r1508382
Log:
LUCENE-5146: AnalyzingSuggester sort order doesn't respect the actual weight
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1508382&r1=1508381&r2=1508382&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jul 30 10:45:38 2013
@@ -95,6 +95,11 @@ Bug Fixes
* LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE
when there's no indexed data and maybe in other circumstances too. (David Smiley)
+* LUCENE-5146: AnalyzingSuggester sort comparator read part of the input key as the
+ weight that caused the sorter to never sort by weight first since the weight is only
+ considered if the input is equal causing the malformed weight to be identical as well.
+ (Simon Willnauer)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1508382&r1=1508381&r2=1508382&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Tue Jul 30 10:45:38 2013
@@ -349,11 +349,14 @@ public class AnalyzingSuggester extends
if (cmp != 0) {
return cmp;
}
+ readerA.skipBytes(scratchA.length);
+ readerB.skipBytes(scratchB.length);
// Next by cost:
long aCost = readerA.readInt();
long bCost = readerB.readInt();
-
+ assert decodeWeight(aCost) >= 0;
+ assert decodeWeight(bCost) >= 0;
if (aCost < bCost) {
return -1;
} else if (aCost > bCost) {
@@ -362,25 +365,18 @@ public class AnalyzingSuggester extends
// Finally by surface form:
if (hasPayloads) {
- readerA.setPosition(readerA.getPosition() + scratchA.length);
scratchA.length = readerA.readShort();
- scratchA.offset = readerA.getPosition();
- readerB.setPosition(readerB.getPosition() + scratchB.length);
scratchB.length = readerB.readShort();
+ scratchA.offset = readerA.getPosition();
scratchB.offset = readerB.getPosition();
} else {
scratchA.offset = readerA.getPosition();
- scratchA.length = a.length - scratchA.offset;
scratchB.offset = readerB.getPosition();
+ scratchA.length = a.length - scratchA.offset;
scratchB.length = b.length - scratchB.offset;
}
-
- cmp = scratchA.compareTo(scratchB);
- if (cmp != 0) {
- return cmp;
- }
-
- return 0;
+
+ return scratchA.compareTo(scratchB);
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1508382&r1=1508381&r2=1508382&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Tue Jul 30 10:45:38 2013
@@ -28,8 +28,11 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
@@ -47,12 +50,14 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.TermFreq;
import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -60,13 +65,16 @@ public class AnalyzingSuggesterTest exte
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
- TermFreq keys[] = new TermFreq[] {
+ Iterable<TermFreq> keys = shuffle(
new TermFreq("foo", 50),
new TermFreq("bar", 10),
+ new TermFreq("barbar", 10),
new TermFreq("barbar", 12),
- new TermFreq("barbara", 6)
- };
-
+ new TermFreq("barbara", 6),
+ new TermFreq("bar", 5),
+ new TermFreq("barbara", 1)
+ );
+
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqArrayIterator(keys));
@@ -103,12 +111,13 @@ public class AnalyzingSuggesterTest exte
}
public void testKeywordWithPayloads() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
+ Iterable<TermFreqPayload> keys = shuffle(
new TermFreqPayload("foo", 50, new BytesRef("hello")),
new TermFreqPayload("bar", 10, new BytesRef("goodbye")),
new TermFreqPayload("barbar", 12, new BytesRef("thank you")),
- new TermFreqPayload("barbara", 6, new BytesRef("for all the fish"))
- };
+ new TermFreqPayload("bar", 9, new BytesRef("should be deduplicated")),
+ new TermFreqPayload("bar", 8, new BytesRef("should also be deduplicated")),
+ new TermFreqPayload("barbara", 6, new BytesRef("for all the fish")));
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new TermFreqPayloadArrayIterator(keys));
@@ -153,6 +162,50 @@ public class AnalyzingSuggesterTest exte
}
}
+ public void testRandomRealisticKeys() throws IOException {
+ LineFileDocs lineFile = new LineFileDocs(random());
+ Map<String, Long> mapping = new HashMap<>();
+ List<TermFreq> keys = new ArrayList<>();
+
+ int howMany = atLeast(100); // this might bring up duplicates
+ for (int i = 0; i < howMany; i++) {
+ Document nextDoc = lineFile.nextDoc();
+ String title = nextDoc.getField("title").stringValue();
+ int randomWeight = random().nextInt(100);
+ keys.add(new TermFreq(title, randomWeight));
+ if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
+ mapping.put(title, Long.valueOf(randomWeight));
+ }
+ }
+
+ AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(new MockAnalyzer(random()));
+ analyzingSuggester.setPreservePositionIncrements(random().nextBoolean());
+ boolean doPayloads = random().nextBoolean();
+ if (doPayloads) {
+ List<TermFreqPayload> keysAndPayloads = new ArrayList<>();
+ for (TermFreq termFreq : keys) {
+ keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
+ }
+ analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
+ } else {
+ analyzingSuggester.build(new TermFreqArrayIterator(keys));
+ }
+
+ for (TermFreq termFreq : keys) {
+ List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
+ for (LookupResult lookupResult : lookup) {
+ assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
+ if (doPayloads) {
+ assertEquals(lookupResult.payload.utf8ToString(), Long.toString(lookupResult.value));
+ } else {
+ assertNull(lookupResult.payload);
+ }
+ }
+ }
+
+ lineFile.close();
+ }
+
// TODO: more tests
/**
* basic "standardanalyzer" test with stopword removal
@@ -703,9 +756,9 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1);
if (doPayloads) {
- suggester.build(new TermFreqPayloadArrayIterator(payloadKeys));
+ suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys)));
} else {
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqArrayIterator(shuffle(keys)));
}
for (String prefix : allPrefixes) {
@@ -823,15 +876,8 @@ public class AnalyzingSuggesterTest exte
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1);
-
- List<TermFreq> keys = Arrays.asList(new TermFreq[] {
- new TermFreq("a", 40),
- new TermFreq("a ", 50),
- new TermFreq(" a", 60),
- });
-
- Collections.shuffle(keys, random());
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40),
+ new TermFreq("a ", 50), new TermFreq(" a", 60))));
List<LookupResult> results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@@ -926,10 +972,9 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+ suggester.build(new TermFreqArrayIterator(shuffle(
new TermFreq("hambone", 6),
- new TermFreq("nellie", 5),
- }));
+ new TermFreq("nellie", 5))));
List<LookupResult> results = suggester.lookup("nellie", false, 2);
assertEquals(2, results.size());
@@ -1147,4 +1192,14 @@ public class AnalyzingSuggesterTest exte
// expected
}
}
+
+ @SafeVarargs
+ public final <T> Iterable<T> shuffle(T...values) {
+ final List<T> asList = new ArrayList<T>(values.length);
+ for (T value : values) {
+ asList.add(value);
+ }
+ Collections.shuffle(asList, random());
+ return asList;
+ }
}