You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/02/24 10:49:40 UTC
svn commit: r1293148 - in /lucene/dev/trunk:
lucene/core/src/java/org/apache/lucene/codecs/simpletext/
lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/util/ lucene/test...
Author: simonw
Date: Fri Feb 24 09:49:39 2012
New Revision: 1293148
URL: http://svn.apache.org/viewvc?rev=1293148&view=rev
Log:
LUCENE-3807: Clean up Suggest API
Removed:
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java
lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Fri Feb 24 09:49:39 2012
@@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Fri Feb 24 09:49:39 2012
@@ -654,7 +654,7 @@ public class DocTermOrds {
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return termsEnum.getComparator();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Fri Feb 24 09:49:39 2012
@@ -174,7 +174,7 @@ public class FilterAtomicReader extends
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return in.getComparator();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Fri Feb 24 09:49:39 2012
@@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return tenum.getComparator();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java Fri Feb 24 09:49:39 2012
@@ -179,13 +179,6 @@ public abstract class TermsEnum implemen
}
};
}
-
- /** Return the {@link BytesRef} Comparator used to sort
- * terms provided by the iterator. This may return
- * null if there are no terms. Callers may invoke this
- * method many times, so it's best to cache a single
- * instance & reuse it. */
- public abstract Comparator<BytesRef> getComparator() throws IOException;
/** An empty TermsEnum for quickly returning an empty instance e.g.
* in {@link org.apache.lucene.search.MultiTermQuery}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Fri Feb 24 09:49:39 2012
@@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCac
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Feb 24 09:49:39 2012
@@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extend
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
+ public Comparator<BytesRef> getComparator() {
return actualEnum.getComparator();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRef.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRef.java Fri Feb 24 09:49:39 2012
@@ -233,13 +233,7 @@ public final class BytesRef implements C
final byte[] bBytes = b.bytes;
int bUpto = b.offset;
- final int aStop;
- if (a.length < b.length) {
- aStop = aUpto + a.length;
- } else {
- aStop = aUpto + b.length;
- }
-
+ final int aStop = aUpto + Math.min(a.length, b.length);
while(aUpto < aStop) {
int aByte = aBytes[aUpto++] & 0xff;
int bByte = bBytes[bUpto++] & 0xff;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java Fri Feb 24 09:49:39 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.io.IOException;
+import java.util.Comparator;
/**
* A simple iterator interface for {@link BytesRef} iteration
@@ -40,6 +41,14 @@ public interface BytesRefIterator {
*/
public BytesRef next() throws IOException;
+ /**
+ * Return the {@link BytesRef} Comparator used to sort terms provided by the
+ * iterator. This may return null if there are no items or the iterator is not
+ * sorted. Callers may invoke this method many times, so it's best to cache a
+ * single instance & reuse it.
+ */
+ public Comparator<BytesRef> getComparator();
+
public final static class EmptyBytesRefIterator implements BytesRefIterator {
@Override
@@ -47,6 +56,10 @@ public interface BytesRefIterator {
return null;
}
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
+
}
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Fri Feb 24 09:49:39 2012
@@ -26,6 +26,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
+import java.nio.CharBuffer;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
@@ -707,4 +708,23 @@ public class _TestUtil {
}
return termsEnum.docs(liveDocs, null, needsFreqs);
}
+
+ public static CharSequence stringToCharSequence(String string, Random random) {
+ return bytesToCharSequence(new BytesRef(string), random);
+ }
+
+ public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
+ switch(random.nextInt(5)) {
+ case 4:
+ CharsRef chars = new CharsRef(ref.length);
+ UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
+ return chars;
+ case 3:
+ return CharBuffer.wrap(ref.utf8ToString());
+ default:
+ return ref.utf8ToString();
+ }
+
+ }
+
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java Fri Feb 24 09:49:39 2012
@@ -16,6 +16,7 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
+import java.io.IOException;
import org.apache.lucene.util.BytesRefIterator;
/**
@@ -30,5 +31,5 @@ public interface Dictionary {
* Return all words present in the dictionary
* @return Iterator
*/
- BytesRefIterator getWordsIterator();
+ BytesRefIterator getWordsIterator() throws IOException;
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java Fri Feb 24 09:49:39 2012
@@ -19,16 +19,13 @@ package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.Comparator;
-import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.BytesRefIterator;
-import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.UnicodeUtil;
/**
* HighFrequencyDictionary: terms taken from the given field
@@ -44,7 +41,6 @@ public class HighFrequencyDictionary imp
private IndexReader reader;
private String field;
private float thresh;
- private final CharsRef spare = new CharsRef();
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader;
@@ -52,66 +48,55 @@ public class HighFrequencyDictionary imp
this.thresh = thresh;
}
- public final BytesRefIterator getWordsIterator() {
+ public final BytesRefIterator getWordsIterator() throws IOException {
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
+ final class HighFrequencyIterator implements TermFreqIterator {
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
+ private long freq;
- HighFrequencyIterator() {
- try {
- Terms terms = MultiFields.getTerms(reader, field);
- if (terms != null) {
- termsEnum = terms.iterator(null);
- } else {
- termsEnum = null;
- }
- minNumDocs = (int)(thresh * (float)reader.numDocs());
- } catch (IOException e) {
- throw new RuntimeException(e);
+ HighFrequencyIterator() throws IOException {
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ termsEnum = terms.iterator(null);
+ } else {
+ termsEnum = null;
}
+ minNumDocs = (int)(thresh * (float)reader.numDocs());
}
private boolean isFrequent(int freq) {
return freq >= minNumDocs;
}
- public float freq() {
- try {
- return termsEnum.docFreq();
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
- }
+ public long weight() {
+ return freq;
}
-
@Override
public BytesRef next() throws IOException {
if (termsEnum != null) {
BytesRef next;
- while ((next = termsEnum.next()) != null) {
+ while((next = termsEnum.next()) != null) {
if (isFrequent(termsEnum.docFreq())) {
+ freq = termsEnum.docFreq();
spare.copyBytes(next);
return spare;
}
- }
+ }
}
return null;
}
@Override
- public Comparator<BytesRef> comparator() {
- try {
- if (termsEnum == null) {
- return null;
- } else {
- return termsEnum.getComparator();
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
+ public Comparator<BytesRef> getComparator() {
+ if (termsEnum == null) {
+ return null;
+ } else {
+ return termsEnum.getComparator();
}
}
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Fri Feb 24 09:49:39 2012
@@ -43,17 +43,12 @@ public class LuceneDictionary implements
this.field = field;
}
- public final BytesRefIterator getWordsIterator() {
-
- try {
- final Terms terms = MultiFields.getTerms(reader, field);
- if (terms != null) {
- return terms.iterator(null);
- } else {
- return BytesRefIterator.EMPTY_ITERATOR;
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
+ public final BytesRefIterator getWordsIterator() throws IOException {
+ final Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ return terms.iterator(null);
+ } else {
+ return BytesRefIterator.EMPTY_ITERATOR;
}
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java Fri Feb 24 09:49:39 2012
@@ -18,7 +18,7 @@ package org.apache.lucene.search.spell;
*/
-import java.util.Iterator;
+import java.util.Comparator;
import java.io.*;
import org.apache.lucene.util.BytesRef;
@@ -53,7 +53,7 @@ public class PlainTextDictionary impleme
in = new BufferedReader(reader);
}
- public BytesRefIterator getWordsIterator() {
+ public BytesRefIterator getWordsIterator() throws IOException {
return new FileIterator();
}
@@ -85,6 +85,11 @@ public class PlainTextDictionary impleme
}
return result;
}
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
}
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java Fri Feb 24 09:49:39 2012
@@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
+import java.util.Comparator;
+
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
public interface TermFreqIterator extends BytesRefIterator {
- public float freq();
+ public long weight();
public static class TermFreqIteratorWrapper implements TermFreqIterator {
private BytesRefIterator wrapped;
@@ -32,12 +34,17 @@ public interface TermFreqIterator extend
this.wrapped = wrapped;
}
- public float freq() {
- return 1.0f;
+ public long weight() {
+ return 1;
}
public BytesRef next() throws IOException {
return wrapped.next();
}
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return wrapped.getComparator();
+ }
}
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java Fri Feb 24 09:49:39 2012
@@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest
*/
import java.io.IOException;
-
+import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements.
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
-
+ // TODO keep this for now
protected BytesRefList entries = new BytesRefList();
protected int curPos = -1;
- protected float[] freqs = new float[1];
+ protected long[] freqs = new long[1];
private final BytesRef spare = new BytesRef();
+ private final Comparator<BytesRef> comp;
public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
+ this.comp = source.getComparator();
BytesRef spare;
int freqIndex = 0;
while((spare = source.next()) != null) {
@@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWr
if (freqIndex >= freqs.length) {
freqs = ArrayUtil.grow(freqs, freqs.length+1);
}
- freqs[freqIndex++] = source.freq();
+ freqs[freqIndex++] = source.weight();
}
}
- public float freq() {
+ public long weight() {
return freqs[curPos];
}
@@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWr
return null;
}
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return comp;
+ }
+
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java Fri Feb 24 09:49:39 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest
import java.io.IOException;
import java.util.Comparator;
+
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
@@ -83,6 +84,11 @@ final class BytesRefList {
}
return null;
}
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
};
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java Fri Feb 24 09:49:39 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest
import java.io.*;
+import java.util.Comparator;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
@@ -56,11 +57,11 @@ public class FileDictionary implements D
}
final class FileIterator implements TermFreqIterator {
- private float curFreq;
+ private long curFreq;
private final BytesRef spare = new BytesRef();
- public float freq() {
+ public long weight() {
return curFreq;
}
@@ -73,7 +74,8 @@ public class FileDictionary implements D
if (line != null) {
String[] fields = line.split("\t");
if (fields.length > 1) {
- curFreq = Float.parseFloat(fields[1]);
+ // keep reading floats for bw compat
+ curFreq = (int)Float.parseFloat(fields[1]);
spare.copyChars(fields[0]);
} else {
spare.copyChars(line);
@@ -86,6 +88,11 @@ public class FileDictionary implements D
return null;
}
}
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
}
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java Fri Feb 24 09:49:39 2012
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
@@ -33,10 +34,10 @@ public abstract class Lookup {
* Result of a lookup.
*/
public static final class LookupResult implements Comparable<LookupResult> {
- public final String key;
+ public final CharSequence key;
public final float value;
- public LookupResult(String key, float value) {
+ public LookupResult(CharSequence key, float value) {
this.key = key;
this.value = value;
}
@@ -48,10 +49,32 @@ public abstract class Lookup {
/** Compare alphabetically. */
public int compareTo(LookupResult o) {
- return this.key.compareTo(o.key);
+ return CHARSEQUENCE_COMPARATOR.compare(key, o.key);
}
}
+ public static final Comparator<CharSequence> CHARSEQUENCE_COMPARATOR = new CharSequenceComparator();
+
+ private static class CharSequenceComparator implements Comparator<CharSequence> {
+
+ @Override
+ public int compare(CharSequence o1, CharSequence o2) {
+ final int l1 = o1.length();
+ final int l2 = o2.length();
+
+ final int aStop = Math.min(l1, l2);
+ for (int i = 0; i < aStop; i++) {
+ int diff = o1.charAt(i) - o2.charAt(i);
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ // One is a prefix of the other, or, they are equal:
+ return l1 - l2;
+ }
+
+ }
+
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
public LookupPriorityQueue(int size) {
@@ -99,8 +122,7 @@ public abstract class Lookup {
* @param num maximum number of results to return
* @return a list of possible completions, with their relative weight (e.g. popularity)
*/
- // TODO: this should be a BytesRef API?
- public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
+ public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
/**
* Modify the lookup data by recording additional data. Optional operation.
@@ -109,16 +131,14 @@ public abstract class Lookup {
* @return true if new key is added, false if it already exists or operation
* is not supported.
*/
- // TODO: this should be a BytesRef API?
- public abstract boolean add(String key, Object value);
+ public abstract boolean add(CharSequence key, Object value);
/**
* Get value associated with a specific key.
* @param key lookup key
* @return associated value
*/
- // TODO: this should be a BytesRef API?
- public abstract Object get(String key);
+ public abstract Object get(CharSequence key);
/**
* Persist the constructed lookup data to a directory. Optional operation.
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java Fri Feb 24 09:49:39 2012
@@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest
import java.io.IOException;
import java.util.Comparator;
-import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
@@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef;
* This wrapper buffers incoming elements and makes sure they are sorted in
* ascending lexicographic order.
*/
-public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
-
+public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
+ // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
private final int[] sortedOrds;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
private final Comparator<BytesRef> comp;
-
public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
super(source);
@@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapp
}
@Override
- public float freq() {
+ public long weight() {
return freqs[currentOrd];
}
@@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapp
}
@Override
- public Comparator<BytesRef> comparator() {
+ public Comparator<BytesRef> getComparator() {
return comp;
}
-
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java Fri Feb 24 09:49:39 2012
@@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
* random order.
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
-
+ // TODO keep this for now
private final int[] ords;
private int currentOrd = -1;
private final BytesRef spare = new BytesRef();
@@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWra
}
@Override
- public float freq() {
+ public long weight() {
return freqs[currentOrd];
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java Fri Feb 24 09:49:39 2012
@@ -199,7 +199,7 @@ public class FSTCompletion {
* @return Returns the suggestions, sorted by their approximated weight first
* (decreasing) and then alphabetically (UTF-8 codepoint order).
*/
- public List<Completion> lookup(String key, int num) {
+ public List<Completion> lookup(CharSequence key, int num) {
if (key.length() == 0 || automaton == null) {
return EMPTY_RESULT;
}
@@ -388,7 +388,7 @@ public class FSTCompletion {
* Returns the bucket assigned to a given key (if found) or <code>null</code> if
* no exact match exists.
*/
- public Integer getBucket(String key) {
+ public Integer getBucket(CharSequence key) {
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
}
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java Fri Feb 24 09:49:39 2012
@@ -46,7 +46,7 @@ import org.apache.lucene.util.fst.*;
* </ul>
*
* <p>
- * At runtime, in {@link FSTCompletion#lookup(String, int)},
+ * At runtime, in {@link FSTCompletion#lookup(CharSequence, int)},
* the automaton is utilized as follows:
* <ul>
* <li>For each possible term weight encoded in the automaton (cached arcs from
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java Fri Feb 24 09:49:39 2012
@@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutp
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
- * to discretize any "weights" as passed from in {@link TermFreqIterator#freq()}
+ * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@@ -171,7 +171,7 @@ public class FSTCompletionLookup extends
}
output.reset(buffer);
- output.writeInt(FloatMagic.toSortable(tfit.freq()));
+ output.writeInt(FloatMagic.toSortable(tfit.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
@@ -232,7 +232,7 @@ public class FSTCompletionLookup extends
}
@Override
- public List<LookupResult> lookup(String key, boolean higherWeightsFirst, int num) {
+ public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
final List<Completion> completions;
if (higherWeightsFirst) {
completions = higherWeightsCompletion.lookup(key, num);
@@ -241,20 +241,23 @@ public class FSTCompletionLookup extends
}
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
+ CharsRef spare = new CharsRef();
for (Completion c : completions) {
- results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket));
+ spare.grow(c.utf8.length);
+ UnicodeUtil.UTF8toUTF16(c.utf8, spare);
+ results.add(new LookupResult(spare.toString(), c.bucket));
}
return results;
}
@Override
- public boolean add(String key, Object value) {
+ public boolean add(CharSequence key, Object value) {
// Not supported.
return false;
}
@Override
- public Float get(String key) {
+ public Object get(CharSequence key) {
Integer bucket = normalCompletion.getBucket(key);
if (bucket == null)
return null;
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java Fri Feb 24 09:49:39 2012
@@ -33,8 +33,10 @@ import org.apache.lucene.store.InputStre
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
@@ -121,7 +123,7 @@ public class WFSTCompletionLookup extend
output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeByte((byte)0); // separator: not used, just for sort order
- output.writeInt((int)encodeWeight(iterator.freq()));
+ output.writeInt((int)encodeWeight(iterator.weight()));
writer.write(buffer, 0, output.getPosition());
}
writer.close();
@@ -200,7 +202,7 @@ public class WFSTCompletionLookup extend
}
@Override
- public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
+ public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
assert num > 0;
BytesRef scratch = new BytesRef(key);
int prefixLength = scratch.length;
@@ -217,8 +219,11 @@ public class WFSTCompletionLookup extend
}
List<LookupResult> results = new ArrayList<LookupResult>(num);
+ CharsRef spare = new CharsRef();
if (exactFirst && arc.isFinal()) {
- results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
+ spare.grow(scratch.length);
+ UnicodeUtil.UTF8toUTF16(scratch, spare);
+ results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
if (--num == 0) {
return results; // that was quick
}
@@ -236,8 +241,9 @@ public class WFSTCompletionLookup extend
// append suffix
Util.toBytesRef(completion.input, suffix);
scratch.append(suffix);
-
- results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output)));
+ spare.grow(scratch.length);
+ UnicodeUtil.UTF8toUTF16(scratch, spare);
+ results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output)));
}
return results;
}
@@ -264,7 +270,7 @@ public class WFSTCompletionLookup extend
}
@Override
- public boolean add(String key, Object value) {
+ public boolean add(CharSequence key, Object value) {
return false; // Not supported.
}
@@ -273,7 +279,7 @@ public class WFSTCompletionLookup extend
* or null if it does not exist.
*/
@Override
- public Float get(String key) {
+ public Object get(CharSequence key) {
Arc<Long> arc = new Arc<Long>();
Long result = null;
try {
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Fri Feb 24 09:49:39 2012
@@ -28,7 +28,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
@@ -45,7 +44,7 @@ public class JaspellLookup extends Looku
@Override
public void build(TermFreqIterator tfit) throws IOException {
- if (tfit instanceof SortedIterator) {
+ if (tfit.getComparator() != null) {
// make sure it's unsorted
// WTF - this could result in yet another sorted iteration....
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
@@ -56,7 +55,7 @@ public class JaspellLookup extends Looku
final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
- float freq = tfit.freq();
+ float freq = tfit.weight();
if (spare.length == 0) {
continue;
}
@@ -67,19 +66,19 @@ public class JaspellLookup extends Looku
}
@Override
- public boolean add(String key, Object value) {
+ public boolean add(CharSequence key, Object value) {
trie.put(key, value);
// XXX
return false;
}
@Override
- public Object get(String key) {
+ public Object get(CharSequence key) {
return trie.get(key);
}
@Override
- public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
+ public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<LookupResult> res = new ArrayList<LookupResult>();
List<String> list;
int count = onlyMorePopular ? num * 2 : num;
@@ -97,7 +96,7 @@ public class JaspellLookup extends Looku
LookupPriorityQueue queue = new LookupPriorityQueue(num);
for (String s : list) {
float freq = (Float)trie.get(s);
- queue.insertWithOverflow(new LookupResult(s, freq));
+ queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
}
for (LookupResult lr : queue.getResults()) {
res.add(lr);
@@ -106,7 +105,7 @@ public class JaspellLookup extends Looku
for (int i = 0; i < maxCnt; i++) {
String s = list.get(i);
float freq = (Float)trie.get(s);
- res.add(new LookupResult(s, freq));
+ res.add(new LookupResult(new CharsRef(s), freq));
}
}
return res;
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java Fri Feb 24 09:49:39 2012
@@ -368,8 +368,8 @@ public class JaspellTernarySearchTrie {
* A <code>String</code> index.
*@return The object retrieved from the Ternary Search Trie.
*/
- public Object get(String key) {
- TSTNode node = getNode(key.trim().toLowerCase());
+ public Object get(CharSequence key) {
+ TSTNode node = getNode(key);
if (node == null) {
return null;
}
@@ -435,7 +435,7 @@ public class JaspellTernarySearchTrie {
*@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>.
*/
- public TSTNode getNode(String key) {
+ public TSTNode getNode(CharSequence key) {
return getNode(key, rootNode);
}
@@ -443,15 +443,14 @@ public class JaspellTernarySearchTrie {
* Returns the node indexed by key, or <code>null</code> if that node doesn't
* exist. The search begins at root node.
*
- *@param key2
+ *@param key
* A <code>String</code> that indexes the node that is returned.
*@param startNode
* The top node defining the subtrie to be searched.
*@return The node object indexed by key. This object is an instance of an
* inner class named <code>TernarySearchTrie.TSTNode</code>.
*/
- protected TSTNode getNode(String key2, TSTNode startNode) {
- String key = key2.trim().toLowerCase();
+ protected TSTNode getNode(CharSequence key, TSTNode startNode) {
if (key == null || startNode == null || key.length() == 0) {
return null;
}
@@ -490,7 +489,7 @@ public class JaspellTernarySearchTrie {
*@exception IllegalArgumentException
* If the key is an empty <code>String</code>.
*/
- protected TSTNode getOrCreateNode(String key) throws NullPointerException,
+ protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException,
IllegalArgumentException {
if (key == null) {
throw new NullPointerException(
@@ -568,7 +567,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned by this method.
*@return A <code>List</code> with the results
*/
- public List<String> matchAlmost(String key, int numReturnValues) {
+ public List<String> matchAlmost(CharSequence key, int numReturnValues) {
return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key,
((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false);
}
@@ -598,7 +597,7 @@ public class JaspellTernarySearchTrie {
*@return A <code>List</code> with the results.
*/
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex,
- int d, String matchAlmostKey, int matchAlmostNumReturnValues,
+ int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues,
List<String> matchAlmostResult2, boolean upTo) {
if ((currentNode == null)
|| (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues)
@@ -658,7 +657,7 @@ public class JaspellTernarySearchTrie {
* The maximum number of values returned from this method.
*@return A <code>List</code> with the results
*/
- public List<String> matchPrefix(String prefix, int numReturnValues) {
+ public List<String> matchPrefix(CharSequence prefix, int numReturnValues) {
Vector<String> sortKeysResult = new Vector<String>();
TSTNode startNode = getNode(prefix);
if (startNode == null) {
@@ -722,8 +721,8 @@ public class JaspellTernarySearchTrie {
*@param value
* The object to be stored in the Trie.
*/
- public void put(String key, Object value) {
- getOrCreateNode(key.trim().toLowerCase()).data = value;
+ public void put(CharSequence key, Object value) {
+ getOrCreateNode(key).data = value;
}
/**
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java Fri Feb 24 09:49:39 2012
@@ -57,7 +57,7 @@ public class TSTAutocomplete {
* index of character in key to be inserted currently.
* @return currentNode The new reference to root node of TST
*/
- public TernaryTreeNode insert(TernaryTreeNode currentNode, String s,
+ public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s,
Object val, int x) {
if (s == null || s.length() <= x) {
return currentNode;
@@ -69,7 +69,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else {
- currentNode.token = s;
+ currentNode.token = s.toString();
currentNode.val = val;
return currentNode;
}
@@ -79,7 +79,7 @@ public class TSTAutocomplete {
if (x < s.length() - 1) {
currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1);
} else {
- currentNode.token = s;
+ currentNode.token = s.toString();
currentNode.val = val;
return currentNode;
}
@@ -104,7 +104,7 @@ public class TSTAutocomplete {
* @return suggest list of auto-completed keys for the given prefix query.
*/
public ArrayList<TernaryTreeNode> prefixCompletion(TernaryTreeNode root,
- String s, int x) {
+ CharSequence s, int x) {
TernaryTreeNode p = root;
ArrayList<TernaryTreeNode> suggest = new ArrayList<TernaryTreeNode>();
Modified: lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java Fri Feb 24 09:49:39 2012
@@ -30,7 +30,6 @@ import java.util.List;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
-import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
@@ -45,7 +44,7 @@ public class TSTLookup extends Lookup {
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
- if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
+ if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
}
@@ -58,34 +57,47 @@ public class TSTLookup extends Lookup {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
- vals.add(new Float(tfit.freq()));
+ vals.add(new Float(tfit.weight()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
@Override
- public boolean add(String key, Object value) {
+ public boolean add(CharSequence key, Object value) {
autocomplete.insert(root, key, value, 0);
// XXX we don't know if a new node was created
return true;
}
@Override
- public Object get(String key) {
+ public Object get(CharSequence key) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
if (list == null || list.isEmpty()) {
return null;
}
for (TernaryTreeNode n : list) {
- if (n.token.equals(key)) {
+ if (charSeqEquals(n.token, key)) {
return n.val;
}
}
return null;
}
+
+ private static boolean charSeqEquals(CharSequence left, CharSequence right) {
+ int len = left.length();
+ if (len != right.length()) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (left.charAt(i) != right.charAt(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
@Override
- public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
+ public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
List<LookupResult> res = new ArrayList<LookupResult>();
if (list == null || list.size() == 0) {
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java Fri Feb 24 09:49:39 2012
@@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends
while ((line = br.readLine()) != null) {
int tab = line.indexOf('|');
assertTrue("No | separator?: " + line, tab >= 0);
- float weight = Float.parseFloat(line.substring(tab + 1));
+ int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
input.add(new TermFreq(key, weight));
}
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java Fri Feb 24 09:49:39 2012
@@ -23,6 +23,7 @@ import org.apache.lucene.search.suggest.
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class PersistenceTest extends LuceneTestCase {
public final String[] keys = new String[] {
@@ -61,7 +62,7 @@ public class PersistenceTest extends Luc
Lookup lookup = lookupClass.newInstance();
TermFreq[] keys = new TermFreq[this.keys.length];
for (int i = 0; i < keys.length; i++)
- keys[i] = new TermFreq(this.keys[i], (float) i);
+ keys[i] = new TermFreq(this.keys[i], i);
lookup.build(new TermFreqArrayIterator(keys));
// Store the suggester.
@@ -75,7 +76,7 @@ public class PersistenceTest extends Luc
// Assert validity.
float previous = Float.NEGATIVE_INFINITY;
for (TermFreq k : keys) {
- Float val = (Float) lookup.get(k.term.utf8ToString());
+ Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
assertNotNull(k.term.utf8ToString(), val);
if (supportsExactWeights) {
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java Fri Feb 24 09:49:39 2012
@@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef;
public final class TermFreq {
public final BytesRef term;
- public final float v;
+ public final long v;
- public TermFreq(String term, float v) {
+ public TermFreq(String term, long v) {
this(new BytesRef(term), v);
}
- public TermFreq(BytesRef term, float v) {
+ public TermFreq(BytesRef term, long v) {
this.term = term;
this.v = v;
}
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java Fri Feb 24 09:49:39 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest
import java.io.IOException;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.search.spell.TermFreqIterator;
@@ -44,7 +45,7 @@ public final class TermFreqArrayIterator
this(i.iterator());
}
- public float freq() {
+ public long weight() {
return current.v;
}
@@ -57,4 +58,9 @@ public final class TermFreqArrayIterator
}
return null;
}
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java Fri Feb 24 09:49:39 2012
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+
+import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java Fri Feb 24 09:49:39 2012
@@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary;
-import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.LuceneTestCase;
@@ -37,8 +36,7 @@ public class TestHighFrequencyDictionary
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
BytesRefIterator tf = dictionary.getWordsIterator();
- assertTrue(tf instanceof SortedIterator);
- ((SortedIterator)tf).comparator();
+ assertNull(tf.getComparator());
assertNull(tf.next());
dir.close();
}
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java Fri Feb 24 09:49:39 2012
@@ -38,7 +38,7 @@ public class TestTermFreqIterator extend
public void testTerms() throws Exception {
int num = atLeast(10000);
- TreeMap<BytesRef,Float> sorted = new TreeMap<BytesRef,Float>();
+ TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
TermFreq[] unsorted = new TermFreq[num];
for (int i = 0; i < num; i++) {
@@ -46,28 +46,28 @@ public class TestTermFreqIterator extend
do {
key = new BytesRef(_TestUtil.randomUnicodeString(random));
} while (sorted.containsKey(key));
- float value = random.nextFloat();
+ long value = random.nextLong();
sorted.put(key, value);
unsorted[i] = new TermFreq(key, value);
}
// test the sorted iterator wrapper
TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
- Iterator<Map.Entry<BytesRef,Float>> expected = sorted.entrySet().iterator();
+ Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
- Map.Entry<BytesRef,Float> entry = expected.next();
+ Map.Entry<BytesRef,Long> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
- assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F);
+ assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
}
assertNull(wrapper.next());
// test the unsorted iterator wrapper
wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted));
- TreeMap<BytesRef,Float> actual = new TreeMap<BytesRef,Float>();
+ TreeMap<BytesRef,Long> actual = new TreeMap<BytesRef,Long>();
BytesRef key;
while ((key = wrapper.next()) != null) {
- float value = wrapper.freq();
+ long value = wrapper.weight();
actual.put(BytesRef.deepCopyOf(key), value);
}
assertEquals(sorted, actual);
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java Fri Feb 24 09:49:39 2012
@@ -28,7 +28,7 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
- public static TermFreq tf(String t, float v) {
+ public static TermFreq tf(String t, int v) {
return new TermFreq(t, v);
}
@@ -62,28 +62,28 @@ public class FSTCompletionTest extends L
tf("foundation", 1),
tf("fourblah", 1),
tf("fourteen", 1),
- tf("four", 0f),
- tf("fourier", 0f),
- tf("fourty", 0f),
+ tf("four", 0),
+ tf("fourier", 0),
+ tf("fourty", 0),
tf("xo", 1),
};
return keys;
}
public void testExactMatchHighPriority() throws Exception {
- assertMatchEquals(completion.lookup("two", 1),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("two", random), 1),
"two/1.0");
}
public void testExactMatchLowPriority() throws Exception {
- assertMatchEquals(completion.lookup("one", 2),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
}
public void testExactMatchReordering() throws Exception {
// Check reordering of exact matches.
- assertMatchEquals(completion.lookup("four", 4),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
@@ -92,49 +92,49 @@ public class FSTCompletionTest extends L
public void testRequestedCount() throws Exception {
// 'one' is promoted after collecting two higher ranking results.
- assertMatchEquals(completion.lookup("one", 2),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
// 'four' is collected in a bucket and then again as an exact match.
- assertMatchEquals(completion.lookup("four", 2),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 2),
"four/0.0",
"fourblah/1.0");
// Check reordering of exact matches.
- assertMatchEquals(completion.lookup("four", 4),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
"fourier/0.0");
// 'one' is at the top after collecting all alphabetical results.
- assertMatchEquals(completionAlphabetical.lookup("one", 2),
+ assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
// 'one' is not promoted after collecting two higher ranking results.
FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false);
- assertMatchEquals(noPromotion.lookup("one", 2),
+ assertMatchEquals(noPromotion.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"oneness/1.0",
"onerous/1.0");
// 'one' is at the top after collecting all alphabetical results.
- assertMatchEquals(completionAlphabetical.lookup("one", 2),
+ assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2),
"one/0.0",
"oneness/1.0");
}
public void testMiss() throws Exception {
- assertMatchEquals(completion.lookup("xyz", 1));
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("xyz", random), 1));
}
public void testAlphabeticWithWeights() throws Exception {
- assertEquals(0, completionAlphabetical.lookup("xyz", 1).size());
+ assertEquals(0, completionAlphabetical.lookup(_TestUtil.stringToCharSequence("xyz", random), 1).size());
}
public void testFullMatchList() throws Exception {
- assertMatchEquals(completion.lookup("one", Integer.MAX_VALUE),
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), Integer.MAX_VALUE),
"oneness/1.0",
"onerous/1.0",
"onesimus/1.0",
@@ -148,7 +148,7 @@ public class FSTCompletionTest extends L
builder.add(new BytesRef(key), 0);
FSTCompletion lookup = builder.build();
- List<Completion> result = lookup.lookup(key, 1);
+ List<Completion> result = lookup.lookup(_TestUtil.stringToCharSequence(key, random), 1);
assertEquals(1, result.size());
}
@@ -158,7 +158,7 @@ public class FSTCompletionTest extends L
Random r = random;
List<TermFreq> keys = new ArrayList<TermFreq>();
for (int i = 0; i < 5000; i++) {
- keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f));
+ keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
}
lookup.build(new TermFreqArrayIterator(keys));
@@ -167,7 +167,7 @@ public class FSTCompletionTest extends L
// are.
Float previous = null;
for (TermFreq tf : keys) {
- Float current = lookup.get(tf.term.utf8ToString());
+ Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
if (previous != null) {
assertEquals(previous, current);
}
@@ -180,28 +180,27 @@ public class FSTCompletionTest extends L
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.build(new TermFreqArrayIterator(input));
-
for (TermFreq tf : input) {
- assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
- assertEquals(tf.term.utf8ToString(), lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
+ assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
+ assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
}
- List<LookupResult> result = lookup.lookup("wit", true, 5);
+ List<LookupResult> result = lookup.lookup(_TestUtil.stringToCharSequence("wit", random), true, 5);
assertEquals(5, result.size());
- assertTrue(result.get(0).key.equals("wit")); // exact match.
- assertTrue(result.get(1).key.equals("with")); // highest count.
+ assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
+ assertTrue(result.get(1).key.toString().equals("with")); // highest count.
}
public void testEmptyInput() throws Exception {
completion = new FSTCompletionBuilder().build();
- assertMatchEquals(completion.lookup("", 10));
+ assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("", random), 10));
}
public void testRandom() throws Exception {
List<TermFreq> freqs = new ArrayList<TermFreq>();
Random rnd = random;
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
- float weight = rnd.nextFloat() * 100;
+ int weight = random.nextInt(100);
freqs.add(new TermFreq("" + rnd.nextLong(), weight));
}
@@ -212,8 +211,8 @@ public class FSTCompletionTest extends L
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
- for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
- assertTrue(lr.key.startsWith(prefix));
+ for (LookupResult lr : lookup.lookup(_TestUtil.stringToCharSequence(prefix, random), true, 10)) {
+ assertTrue(lr.key.toString().startsWith(prefix));
}
}
}
Modified: lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/trunk/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Fri Feb 24 09:49:39 2012
@@ -45,33 +45,33 @@ public class WFSTCompletionTest extends
suggester.build(new TermFreqArrayIterator(keys));
// top N of 2, but only foo is available
- List<LookupResult> results = suggester.lookup("f", false, 2);
+ List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
assertEquals(1, results.size());
- assertEquals("foo", results.get(0).key);
+ assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher
- results = suggester.lookup("bar", false, 1);
+ results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1);
assertEquals(1, results.size());
- assertEquals("bar", results.get(0).key);
+ assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
- results = suggester.lookup("b", false, 2);
+ results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2);
assertEquals(2, results.size());
- assertEquals("barbar", results.get(0).key);
+ assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
- assertEquals("bar", results.get(1).key);
+ assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
- results = suggester.lookup("ba", false, 3);
+ results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3);
assertEquals(3, results.size());
- assertEquals("barbar", results.get(0).key);
+ assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
- assertEquals("bar", results.get(1).key);
+ assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
- assertEquals("barbara", results.get(2).key);
+ assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
}
@@ -100,7 +100,7 @@ public class WFSTCompletionTest extends
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random.nextInt(1<<24);
slowCompletor.put(s, (long)weight);
- keys[i] = new TermFreq(s, (float) weight);
+ keys[i] = new TermFreq(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
@@ -109,7 +109,7 @@ public class WFSTCompletionTest extends
for (String prefix : allPrefixes) {
final int topN = _TestUtil.nextInt(random, 1, 10);
- List<LookupResult> r = suggester.lookup(prefix, false, topN);
+ List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN);
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
final List<LookupResult> matches = new ArrayList<LookupResult>();
@@ -126,7 +126,7 @@ public class WFSTCompletionTest extends
public int compare(LookupResult left, LookupResult right) {
int cmp = Float.compare(right.value, left.value);
if (cmp == 0) {
- return left.key.compareTo(right.key);
+ return left.compareTo(right);
} else {
return cmp;
}
@@ -140,7 +140,7 @@ public class WFSTCompletionTest extends
for(int hit=0;hit<r.size();hit++) {
//System.out.println(" check hit " + hit);
- assertEquals(matches.get(hit).key, r.get(hit).key);
+ assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1293148&r1=1293147&r2=1293148&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java Fri Feb 24 09:49:39 2012
@@ -31,6 +31,7 @@ import org.apache.lucene.search.spell.Hi
import org.apache.lucene.search.suggest.FileDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
+import org.apache.lucene.util.CharsRef;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
@@ -152,7 +153,7 @@ public class Suggester extends SolrSpell
build(core, searcher);
}
- public void add(String query, int numHits) {
+ public void add(CharsRef query, int numHits) {
LOG.info("add " + query + ", " + numHits);
lookup.add(query, new Integer(numHits));
}
@@ -167,9 +168,12 @@ public class Suggester extends SolrSpell
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
+ CharsRef scratch = new CharsRef();
for (Token t : options.tokens) {
- String term = new String(t.buffer(), 0, t.length());
- List<LookupResult> suggestions = lookup.lookup(term,
+ scratch.chars = t.buffer();
+ scratch.offset = 0;
+ scratch.length = t.length();
+ List<LookupResult> suggestions = lookup.lookup(scratch,
options.onlyMorePopular, options.count);
if (suggestions == null) {
continue;
@@ -178,7 +182,7 @@ public class Suggester extends SolrSpell
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
- res.add(t, lr.key, ((Number)lr.value).intValue());
+ res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
}
}
return res;