You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/10/13 12:26:23 UTC
svn commit: r1531664 [1/2] - in /lucene/dev/trunk/lucene: ./
suggest/src/java/org/apache/lucene/search/spell/
suggest/src/java/org/apache/lucene/search/suggest/
suggest/src/java/org/apache/lucene/search/suggest/analyzing/
suggest/src/java/org/apache/lu...
Author: mikemccand
Date: Sun Oct 13 10:26:22 2013
New Revision: 1531664
URL: http://svn.apache.org/r1531664
Log:
LUCENE-5260: cutover all suggesters to TermFreqPayloadIterator
Added:
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java (with props)
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java (with props)
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java (with props)
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java (with props)
Removed:
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun Oct 13 10:26:22 2013
@@ -150,6 +150,10 @@ API Changes:
numBits parameter to allow growing/shrinking the copied bitset. You can
use FixedBitSet.clone() if you only need to clone the bitset. (Shai Erera)
+* LUCENE-5260: Use TermFreqPayloadIterator for all suggesters; those
+ suggesters that can't support payloads will throw an exception if
+ hasPayloads() is true. (Areek Zillur via Mike McCandless)
+
Optimizations
* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java Sun Oct 13 10:26:22 2013
@@ -59,7 +59,7 @@ public class HighFrequencyDictionary imp
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements TermFreqIterator {
+ final class HighFrequencyIterator implements TermFreqPayloadIterator {
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
@@ -98,5 +98,15 @@ public class HighFrequencyDictionary imp
}
return null;
}
+
+ @Override
+ public BytesRef payload() {
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return false;
+ }
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java Sun Oct 13 10:26:22 2013
@@ -17,20 +17,67 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
+import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
/**
* Interface for enumerating term,weight,payload triples;
- * currently only {@link AnalyzingSuggester} and {@link
- * FuzzySuggester} support payloads.
+ * currently only {@link AnalyzingSuggester}, {@link
+ * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads.
*/
-public interface TermFreqPayloadIterator extends TermFreqIterator {
+public interface TermFreqPayloadIterator extends BytesRefIterator {
+ /** A term's weight, higher numbers mean better suggestions. */
+ public long weight();
+
/** An arbitrary byte[] to record per suggestion. See
* {@link LookupResult#payload} to retrieve the payload
* for each suggestion. */
public BytesRef payload();
+
+ /** Returns true if the iterator has payloads */
+ public boolean hasPayloads();
+
+ /**
+ * Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights
+ * set to <code>1</code> and carries no payload
+ */
+ public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
+ private final BytesRefIterator wrapped;
+
+ /**
+ * Creates a new wrapper, wrapping the specified iterator and
+ * specifying a weight value of <code>1</code> for all terms
+ * and nullifies associated payloads.
+ */
+ public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ @Override
+ public long weight() {
+ return 1;
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ return wrapped.next();
+ }
+
+ @Override
+ public BytesRef payload() {
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return false;
+ }
+ }
}
Added: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java?rev=1531664&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java (added)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java Sun Oct 13 10:26:22 2013
@@ -0,0 +1,89 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Counter;
+
+/**
+ * This wrapper buffers incoming elements.
+ * @lucene.experimental
+ */
+public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
+ // TODO keep this for now
+ /** buffered term entries */
+ protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
+ /** buffered payload entries */
+ protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
+ /** current buffer position */
+ protected int curPos = -1;
+ /** buffered weights, parallel with {@link #entries} */
+ protected long[] freqs = new long[1];
+ private final BytesRef spare = new BytesRef();
+ private final BytesRef payloadSpare = new BytesRef();
+ private final boolean hasPayloads;
+
+ /** Creates a new iterator, buffering entries from the specified iterator */
+ public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
+ BytesRef spare;
+ int freqIndex = 0;
+ hasPayloads = source.hasPayloads();
+ while((spare = source.next()) != null) {
+ entries.append(spare);
+ if (hasPayloads) {
+ payloads.append(source.payload());
+ }
+ if (freqIndex >= freqs.length) {
+ freqs = ArrayUtil.grow(freqs, freqs.length+1);
+ }
+ freqs[freqIndex++] = source.weight();
+ }
+
+ }
+
+ @Override
+ public long weight() {
+ return freqs[curPos];
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ entries.get(spare, curPos);
+ return spare;
+ }
+ return null;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads && curPos < payloads.size()) {
+ return payloads.get(payloadSpare, curPos);
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Sun Oct 13 10:26:22 2013
@@ -47,12 +47,6 @@ import org.apache.lucene.util.BytesRefIt
* The term, weight and (optionally) payload fields supplied
* are required for ALL documents and has to be stored
* </li>
- * <li>
- * This Dictionary implementation is not compatible with the following Suggesters:
- * {@link JaspellLookup}, {@link TSTLookup}, {@link FSTCompletionLookup},
- * {@link WFSTCompletionLookup} and {@link AnalyzingInfixSuggester}.
- * see https://issues.apache.org/jira/browse/LUCENE-5260
- * </li>
* </ul>
*/
public class DocumentDictionary implements Dictionary {
@@ -95,7 +89,7 @@ public class DocumentDictionary implemen
final class TermWeightPayloadIterator implements TermFreqPayloadIterator {
private final int docCount;
private final Set<String> relevantFields;
- private final boolean withPayload;
+ private final boolean hasPayloads;
private final Bits liveDocs;
private int currentDocId = -1;
private long currentWeight;
@@ -106,13 +100,13 @@ public class DocumentDictionary implemen
* index. setting <code>withPayload</code> to false, implies an iterator
* over only term and weight.
*/
- public TermWeightPayloadIterator(boolean withPayload) throws IOException {
+ public TermWeightPayloadIterator(boolean hasPayloads) throws IOException {
docCount = reader.maxDoc() - 1;
- this.withPayload = withPayload;
+ this.hasPayloads = hasPayloads;
currentPayload = null;
liveDocs = MultiFields.getLiveDocs(reader);
List<String> relevantFieldList;
- if(withPayload) {
+ if(hasPayloads) {
relevantFieldList = Arrays.asList(field, weightField, payloadField);
} else {
relevantFieldList = Arrays.asList(field, weightField);
@@ -135,7 +129,7 @@ public class DocumentDictionary implemen
StoredDocument doc = reader.document(currentDocId, relevantFields);
- if (withPayload) {
+ if (hasPayloads) {
StorableField payload = doc.getField(payloadField);
if (payload == null) {
throw new IllegalArgumentException(payloadField + " does not exist");
@@ -169,6 +163,11 @@ public class DocumentDictionary implemen
public BytesRef payload() {
return currentPayload;
}
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java Sun Oct 13 10:26:22 2013
@@ -21,7 +21,7 @@ package org.apache.lucene.search.suggest
import java.io.*;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -57,11 +57,11 @@ public class FileDictionary implements D
}
@Override
- public TermFreqIterator getWordsIterator() {
+ public TermFreqPayloadIterator getWordsIterator() {
return new FileIterator();
}
- final class FileIterator implements TermFreqIterator {
+ final class FileIterator implements TermFreqPayloadIterator {
private long curFreq;
private final BytesRef spare = new BytesRef();
@@ -98,5 +98,15 @@ public class FileDictionary implements D
return null;
}
}
+
+ @Override
+ public BytesRef payload() {
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return false;
+ }
}
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java Sun Oct 13 10:26:22 2013
@@ -24,7 +24,7 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
@@ -154,25 +154,25 @@ public abstract class Lookup {
/** Build lookup from a dictionary. Some implementations may require sorted
* or unsorted keys from the dictionary's iterator - use
- * {@link SortedTermFreqIteratorWrapper} or
- * {@link UnsortedTermFreqIteratorWrapper} in such case.
+ * {@link SortedTermFreqPayloadIteratorWrapper} or
+ * {@link UnsortedTermFreqPayloadIteratorWrapper} in such case.
*/
public void build(Dictionary dict) throws IOException {
BytesRefIterator it = dict.getWordsIterator();
- TermFreqIterator tfit;
- if (it instanceof TermFreqIterator) {
- tfit = (TermFreqIterator)it;
+ TermFreqPayloadIterator tfit;
+ if (it instanceof TermFreqPayloadIterator) {
+ tfit = (TermFreqPayloadIterator)it;
} else {
- tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
+ tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it);
}
build(tfit);
}
/**
- * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
+ * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}.
* The implementation might re-sort the data internally.
*/
- public abstract void build(TermFreqIterator tfit) throws IOException;
+ public abstract void build(TermFreqPayloadIterator tfit) throws IOException;
/**
* Look up a key and return possible completion for this key.
Added: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java?rev=1531664&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java (added)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java Sun Oct 13 10:26:22 2013
@@ -0,0 +1,227 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
+ */
+public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
+
+ private final TermFreqPayloadIterator source;
+ private File tempInput;
+ private File tempSorted;
+ private final ByteSequencesReader reader;
+ private final Comparator<BytesRef> comparator;
+ private final boolean hasPayloads;
+ private boolean done = false;
+
+ private long weight;
+ private final BytesRef scratch = new BytesRef();
+ private BytesRef payload = new BytesRef();
+
+ /**
+ * Creates a new sorted wrapper, using {@link
+ * BytesRef#getUTF8SortedAsUnicodeComparator} for
+ * sorting. */
+ public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
+ this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
+ }
+
+ /**
+ * Creates a new sorted wrapper, sorting by BytesRef
+ * (ascending) then cost (ascending).
+ */
+ public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator<BytesRef> comparator) throws IOException {
+ this.hasPayloads = source.hasPayloads();
+ this.source = source;
+ this.comparator = comparator;
+ this.reader = sort();
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ boolean success = false;
+ if (done) {
+ return null;
+ }
+ try {
+ ByteArrayDataInput input = new ByteArrayDataInput();
+ if (reader.read(scratch)) {
+ weight = decode(scratch, input);
+ if (hasPayloads) {
+ payload = decodePayload(scratch, input);
+ }
+ success = true;
+ return scratch;
+ }
+ close();
+ success = done = true;
+ return null;
+ } finally {
+ if (!success) {
+ done = true;
+ close();
+ }
+ }
+ }
+
+ @Override
+ public long weight() {
+ return weight;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads) {
+ return payload;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ /** Sortes by BytesRef (ascending) then cost (ascending). */
+ private final Comparator<BytesRef> tieBreakByCostComparator = new Comparator<BytesRef>() {
+
+ private final BytesRef leftScratch = new BytesRef();
+ private final BytesRef rightScratch = new BytesRef();
+ private final ByteArrayDataInput input = new ByteArrayDataInput();
+
+ @Override
+ public int compare(BytesRef left, BytesRef right) {
+ // Make shallow copy in case decode changes the BytesRef:
+ leftScratch.bytes = left.bytes;
+ leftScratch.offset = left.offset;
+ leftScratch.length = left.length;
+ rightScratch.bytes = right.bytes;
+ rightScratch.offset = right.offset;
+ rightScratch.length = right.length;
+ long leftCost = decode(leftScratch, input);
+ long rightCost = decode(rightScratch, input);
+ if (hasPayloads) {
+ decodePayload(leftScratch, input);
+ decodePayload(rightScratch, input);
+ }
+ int cmp = comparator.compare(leftScratch, rightScratch);
+ if (cmp != 0) {
+ return cmp;
+ }
+ return Long.compare(leftCost, rightCost);
+ }
+ };
+
+ private Sort.ByteSequencesReader sort() throws IOException {
+ String prefix = getClass().getSimpleName();
+ File directory = Sort.defaultTempDir();
+ tempInput = File.createTempFile(prefix, ".input", directory);
+ tempSorted = File.createTempFile(prefix, ".sorted", directory);
+
+ final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+ boolean success = false;
+ try {
+ BytesRef spare;
+ byte[] buffer = new byte[0];
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+ while ((spare = source.next()) != null) {
+ encode(writer, output, buffer, spare, source.payload(), source.weight());
+ }
+ writer.close();
+ new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted);
+ ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+ success = true;
+ return reader;
+
+ } finally {
+ if (success) {
+ IOUtils.close(writer);
+ } else {
+ try {
+ IOUtils.closeWhileHandlingException(writer);
+ } finally {
+ close();
+ }
+ }
+ }
+ }
+
+ private void close() throws IOException {
+ IOUtils.close(reader);
+ if (tempInput != null) {
+ tempInput.delete();
+ }
+ if (tempSorted != null) {
+ tempSorted.delete();
+ }
+ }
+
+ /** encodes an entry (bytes+(payload)+weight) to the provided writer */
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
+ int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
+ if (requiredLength >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, requiredLength);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ if (hasPayloads) {
+ output.writeBytes(payload.bytes, payload.offset, payload.length);
+ output.writeShort((short) payload.length);
+ }
+ output.writeLong(weight);
+ writer.write(buffer, 0, output.getPosition());
+ }
+
+ /** decodes the weight at the current position */
+ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 8); // suggestion
+ scratch.length -= 8; // long
+ return tmpInput.readLong();
+ }
+
+ /** decodes the payload at the current position */
+ protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 2); // skip to payload size
+ short payloadLength = tmpInput.readShort(); // read payload size
+ tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
+ BytesRef payloadScratch = new BytesRef(payloadLength);
+ tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
+ payloadScratch.length = payloadLength;
+ scratch.length -= 2; // payload length info (short)
+ scratch.length -= payloadLength; // payload
+ return payloadScratch;
+ }
+}
Added: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java?rev=1531664&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java (added)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java Sun Oct 13 10:26:22 2013
@@ -0,0 +1,79 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This wrapper buffers the incoming elements and makes sure they are in
+ * random order.
+ * @lucene.experimental
+ */
+public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper {
+ // TODO keep this for now
+ private final int[] ords;
+ private int currentOrd = -1;
+ private final BytesRef spare = new BytesRef();
+ private final BytesRef payloadSpare = new BytesRef();
+ /**
+ * Creates a new iterator, wrapping the specified iterator and
+ * returning elements in a random order.
+ */
+ public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
+ super(source);
+ ords = new int[entries.size()];
+ Random random = new Random();
+ for (int i = 0; i < ords.length; i++) {
+ ords[i] = i;
+ }
+ for (int i = 0; i < ords.length; i++) {
+ int randomPosition = random.nextInt(ords.length);
+ int temp = ords[i];
+ ords[i] = ords[randomPosition];
+ ords[randomPosition] = temp;
+ }
+ }
+
+ @Override
+ public long weight() {
+ assert currentOrd == ords[curPos];
+ return freqs[currentOrd];
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ currentOrd = ords[curPos];
+ return entries.get(spare, currentOrd);
+ }
+ return null;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads() && curPos < payloads.size()) {
+ assert currentOrd == ords[curPos];
+ return payloads.get(payloadSpare, currentOrd);
+ }
+ return null;
+ }
+}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Sun Oct 13 10:26:22 2013
@@ -65,7 +65,6 @@ import org.apache.lucene.search.ScoreDoc
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.Lookup;
@@ -176,19 +175,14 @@ public class AnalyzingInfixSuggester ext
}
@Override
- public void build(TermFreqIterator iter) throws IOException {
+ public void build(TermFreqPayloadIterator iter) throws IOException {
if (searcher != null) {
searcher.getIndexReader().close();
searcher = null;
}
- TermFreqPayloadIterator payloads;
- if (iter instanceof TermFreqPayloadIterator) {
- payloads = (TermFreqPayloadIterator) iter;
- } else {
- payloads = null;
- }
+
Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));
IndexWriter w = null;
@@ -236,7 +230,7 @@ public class AnalyzingInfixSuggester ext
doc.add(weightField);
Field payloadField;
- if (payloads != null) {
+ if (iter.hasPayloads()) {
payloadField = new BinaryDocValuesField("payloads", new BytesRef());
doc.add(payloadField);
} else {
@@ -250,8 +244,8 @@ public class AnalyzingInfixSuggester ext
textGramField.setStringValue(textString);
textDVField.setBytesValue(text);
weightField.setLongValue(iter.weight());
- if (payloads != null) {
- payloadField.setBytesValue(payloads.payload());
+ if (iter.hasPayloads()) {
+ payloadField.setBytesValue(iter.payload());
}
w.addDocument(doc);
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sun Oct 13 10:26:22 2013
@@ -31,7 +31,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
@@ -381,19 +380,13 @@ public class AnalyzingSuggester extends
}
@Override
- public void build(TermFreqIterator iterator) throws IOException {
+ public void build(TermFreqPayloadIterator iterator) throws IOException {
String prefix = getClass().getSimpleName();
File directory = Sort.defaultTempDir();
File tempInput = File.createTempFile(prefix, ".input", directory);
File tempSorted = File.createTempFile(prefix, ".sorted", directory);
- TermFreqPayloadIterator payloads;
- if (iterator instanceof TermFreqPayloadIterator) {
- payloads = (TermFreqPayloadIterator) iterator;
- } else {
- payloads = null;
- }
- hasPayloads = payloads != null;
+ hasPayloads = iterator.hasPayloads();
Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
Sort.ByteSequencesReader reader = null;
@@ -432,7 +425,7 @@ public class AnalyzingSuggester extends
if (surfaceForm.length > (Short.MAX_VALUE-2)) {
throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
}
- payload = payloads.payload();
+ payload = iterator.payload();
// payload + surfaceLength (short)
requiredLength += payload.length + 2;
} else {
@@ -470,7 +463,7 @@ public class AnalyzingSuggester extends
writer.close();
// Sort all input/output pairs (required by FST.Builder):
- new Sort(new AnalyzingComparator(payloads != null)).sort(tempInput, tempSorted);
+ new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
// Free disk space:
tempInput.delete();
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java Sun Oct 13 10:26:22 2013
@@ -54,7 +54,6 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
@@ -274,15 +273,15 @@ public class FreeTextSuggester extends L
}
@Override
- public void build(TermFreqIterator iterator) throws IOException {
+ public void build(TermFreqPayloadIterator iterator) throws IOException {
build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
}
/** Build the suggest index, using up to the specified
* amount of temporary RAM while building. Note that
* the weights for the suggestions are ignored. */
- public void build(TermFreqIterator iterator, double ramBufferSizeMB) throws IOException {
- if (iterator instanceof TermFreqPayloadIterator) {
+ public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException {
+ if (iterator.hasPayloads()) {
throw new IllegalArgumentException("payloads are not supported");
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java Sun Oct 13 10:26:22 2013
@@ -24,7 +24,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.SortInfo;
@@ -43,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutp
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
- * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
+ * to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@@ -96,7 +95,7 @@ public class FSTCompletionLookup extends
/**
* This constructor prepares for creating a suggested FST using the
- * {@link #build(TermFreqIterator)} method. The number of weight
+ * {@link #build(TermFreqPayloadIterator)} method. The number of weight
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
* exact matches are promoted to the top of the suggestions list.
*/
@@ -106,7 +105,7 @@ public class FSTCompletionLookup extends
/**
* This constructor prepares for creating a suggested FST using the
- * {@link #build(TermFreqIterator)} method.
+ * {@link #build(TermFreqPayloadIterator)} method.
*
* @param buckets
* The number of weight discretization buckets (see
@@ -141,8 +140,8 @@ public class FSTCompletionLookup extends
}
@Override
- public void build(TermFreqIterator tfit) throws IOException {
- if (tfit instanceof TermFreqPayloadIterator) {
+ public void build(TermFreqPayloadIterator tfit) throws IOException {
+ if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
File tempInput = File.createTempFile(
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java Sun Oct 13 10:26:22 2013
@@ -25,11 +25,10 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
-import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -93,12 +92,12 @@ public class WFSTCompletionLookup extend
}
@Override
- public void build(TermFreqIterator iterator) throws IOException {
- if (iterator instanceof TermFreqPayloadIterator) {
+ public void build(TermFreqPayloadIterator iterator) throws IOException {
+ if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
BytesRef scratch = new BytesRef();
- TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
+ TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@@ -255,14 +254,15 @@ public class WFSTCompletionLookup extend
return Integer.MAX_VALUE - (int)value;
}
- private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
+ private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper {
- WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
+ WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
super(source);
+ assert source.hasPayloads() == false;
}
@Override
- protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
}
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Sun Oct 13 10:26:22 2013
@@ -25,7 +25,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
@@ -47,13 +46,13 @@ public class JaspellLookup extends Looku
/**
* Creates a new empty trie
- * @see #build(TermFreqIterator)
+ * @see #build(TermFreqPayloadIterator)
* */
public JaspellLookup() {}
@Override
- public void build(TermFreqIterator tfit) throws IOException {
- if (tfit instanceof TermFreqPayloadIterator) {
+ public void build(TermFreqPayloadIterator tfit) throws IOException {
+ if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
trie = new JaspellTernarySearchTrie();
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java Sun Oct 13 10:26:22 2013
@@ -25,10 +25,9 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
@@ -46,19 +45,19 @@ public class TSTLookup extends Lookup {
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
- * @see #build(TermFreqIterator)
+ * @see #build(TermFreqPayloadIterator)
*/
public TSTLookup() {}
@Override
- public void build(TermFreqIterator tfit) throws IOException {
- if (tfit instanceof TermFreqPayloadIterator) {
+ public void build(TermFreqPayloadIterator tfit) throws IOException {
+ if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
root = new TernaryTreeNode();
// make sure it's sorted and the comparator uses UTF16 sort order
- tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
+ tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Number> vals = new ArrayList<Number>();
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java Sun Oct 13 10:26:22 2013
@@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends
/**
* Input term/weight pairs.
*/
- private static TermFreq [] dictionaryInput;
+ private static TermFreqPayload [] dictionaryInput;
/**
* Benchmark term/weight pairs (randomized order).
*/
- private static List<TermFreq> benchmarkInput;
+ private static List<TermFreqPayload> benchmarkInput;
/**
* Loads terms and frequencies from Wikipedia (cached).
@@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends
@BeforeClass
public static void setup() throws Exception {
assert false : "disable assertions before running benchmarks!";
- List<TermFreq> input = readTop50KWiki();
+ List<TermFreqPayload> input = readTop50KWiki();
Collections.shuffle(input, random);
- LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
+ LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]);
Collections.shuffle(input, random);
LookupBenchmarkTest.benchmarkInput = input;
}
@@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends
/**
* Collect the multilingual input for benchmarks/ tests.
*/
- public static List<TermFreq> readTop50KWiki() throws Exception {
- List<TermFreq> input = new ArrayList<TermFreq>();
+ public static List<TermFreqPayload> readTop50KWiki() throws Exception {
+ List<TermFreqPayload> input = new ArrayList<TermFreqPayload>();
URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
assert resource != null : "Resource missing: Top50KWiki.utf8";
@@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends
assertTrue("No | separator?: " + line, tab >= 0);
int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
- input.add(new TermFreq(key, weight));
+ input.add(new TermFreqPayload(key, weight));
}
br.close();
return input;
@@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends
/**
* Create {@link Lookup} instance and populate it.
*/
- private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
+ private Lookup buildLookup(Class<? extends Lookup> cls, TermFreqPayload[] input) throws Exception {
Lookup lookup = null;
try {
lookup = cls.newInstance();
@@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends
lookup = ctor.newInstance(a);
}
}
- lookup.build(new TermFreqArrayIterator(input));
+ lookup.build(new TermFreqPayloadArrayIterator(input));
return lookup;
}
@@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends
final Lookup lookup = buildLookup(cls, dictionaryInput);
final List<String> input = new ArrayList<String>(benchmarkInput.size());
- for (TermFreq tf : benchmarkInput) {
+ for (TermFreqPayload tf : benchmarkInput) {
String s = tf.term.utf8ToString();
String sub = s.substring(0, Math.min(s.length(),
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java Sun Oct 13 10:26:22 2013
@@ -65,10 +65,10 @@ public class PersistenceTest extends Luc
// Add all input keys.
Lookup lookup = lookupClass.newInstance();
- TermFreq[] keys = new TermFreq[this.keys.length];
+ TermFreqPayload[] keys = new TermFreqPayload[this.keys.length];
for (int i = 0; i < keys.length; i++)
- keys[i] = new TermFreq(this.keys[i], i);
- lookup.build(new TermFreqArrayIterator(keys));
+ keys[i] = new TermFreqPayload(this.keys[i], i);
+ lookup.build(new TermFreqPayloadArrayIterator(keys));
// Store the suggester.
File storeDir = TEMP_DIR;
@@ -81,7 +81,7 @@ public class PersistenceTest extends Luc
// Assert validity.
Random random = random();
long previous = Long.MIN_VALUE;
- for (TermFreq k : keys) {
+ for (TermFreqPayload k : keys) {
List<LookupResult> list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
assertEquals(1, list.size());
LookupResult lookupResult = list.get(0);
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java Sun Oct 13 10:26:22 2013
@@ -23,14 +23,32 @@ public final class TermFreqPayload {
public final BytesRef term;
public final long v;
public final BytesRef payload;
+ public final boolean hasPayloads;
+ public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
+ this(term, v, payload, true);
+ }
+
public TermFreqPayload(String term, long v, BytesRef payload) {
- this(new BytesRef(term), v, payload);
+ this(new BytesRef(term), v, payload, true);
}
- public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
+ public TermFreqPayload(BytesRef term, long v) {
+ this(term, v, null, false);
+ }
+
+ public TermFreqPayload(String term, long v) {
+ this(new BytesRef(term), v, null, false);
+ }
+
+ public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
this.term = term;
this.v = v;
this.payload = payload;
+ this.hasPayloads = hasPayloads;
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
}
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java Sun Oct 13 10:26:22 2013
@@ -20,26 +20,33 @@ package org.apache.lucene.search.suggest
import java.util.Arrays;
import java.util.Iterator;
-import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
/**
- * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
+ * A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s.
*/
public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator {
private final Iterator<TermFreqPayload> i;
+ private final boolean hasPayloads;
+ private boolean first;
private TermFreqPayload current;
private final BytesRef spare = new BytesRef();
public TermFreqPayloadArrayIterator(Iterator<TermFreqPayload> i) {
this.i = i;
+ if (i.hasNext()) {
+ current = i.next();
+ first = true;
+ this.hasPayloads = current.hasPayloads;
+ } else {
+ this.hasPayloads = false;
+ }
}
public TermFreqPayloadArrayIterator(TermFreqPayload[] i) {
this(Arrays.asList(i));
}
-
public TermFreqPayloadArrayIterator(Iterable<TermFreqPayload> i) {
this(i.iterator());
}
@@ -51,8 +58,12 @@ public final class TermFreqPayloadArrayI
@Override
public BytesRef next() {
- if (i.hasNext()) {
- current = i.next();
+ if (i.hasNext() || (first && current!=null)) {
+ if (first) {
+ first = false;
+ } else {
+ current = i.next();
+ }
spare.copyBytes(current.term);
return spare;
}
@@ -63,4 +74,9 @@ public final class TermFreqPayloadArrayI
public BytesRef payload() {
return current.payload;
}
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
}
\ No newline at end of file
Added: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java?rev=1531664&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java (added)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java Sun Oct 13 10:26:22 2013
@@ -0,0 +1,124 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+
+import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestTermFreqPayloadIterator extends LuceneTestCase {
+
+ public void testEmpty() throws Exception {
+ TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]);
+ TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
+ assertNull(wrapper.next());
+ wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator);
+ assertNull(wrapper.next());
+ }
+
+ public void testTerms() throws Exception {
+ Random random = random();
+ int num = atLeast(10000);
+
+ Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
+ TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> sorted = new TreeMap<>(comparator);
+ TreeMap<BytesRef, Long> sortedWithoutPayload = new TreeMap<>(comparator);
+ TermFreqPayload[] unsorted = new TermFreqPayload[num];
+ TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num];
+
+ for (int i = 0; i < num; i++) {
+ BytesRef key;
+ BytesRef payload;
+ do {
+ key = new BytesRef(_TestUtil.randomUnicodeString(random));
+ payload = new BytesRef(_TestUtil.randomUnicodeString(random));
+ } while (sorted.containsKey(key));
+ long value = random.nextLong();
+ sortedWithoutPayload.put(key, value);
+ sorted.put(key, new SimpleEntry<>(value, payload));
+ unsorted[i] = new TermFreqPayload(key, value, payload);
+ unsortedWithoutPayload[i] = new TermFreqPayload(key, value);
+ }
+
+ // test the sorted iterator wrapper with payloads
+ TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator);
+ Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
+ while (expected.hasNext()) {
+ Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
+
+ assertEquals(entry.getKey(), wrapper.next());
+ assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
+ assertEquals(entry.getValue().getValue(), wrapper.payload());
+ }
+ assertNull(wrapper.next());
+
+ // test the unsorted iterator wrapper with payloads
+ wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted));
+ TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();
+ BytesRef key;
+ while ((key = wrapper.next()) != null) {
+ long value = wrapper.weight();
+ BytesRef payload = wrapper.payload();
+ actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload)));
+ }
+ assertEquals(sorted, actual);
+
+ // test the sorted iterator wrapper without payloads
+ TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator);
+ Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
+ while (expectedWithoutPayload.hasNext()) {
+ Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
+
+ assertEquals(entry.getKey(), wrapperWithoutPayload.next());
+ assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
+ assertNull(wrapperWithoutPayload.payload());
+ }
+ assertNull(wrapperWithoutPayload.next());
+
+ // test the unsorted iterator wrapper without payloads
+ wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload));
+ TreeMap<BytesRef, Long> actualWithoutPayload = new TreeMap<>();
+ while ((key = wrapperWithoutPayload.next()) != null) {
+ long value = wrapperWithoutPayload.weight();
+ assertNull(wrapperWithoutPayload.payload());
+ actualWithoutPayload.put(BytesRef.deepCopyOf(key), value);
+ }
+ assertEquals(sortedWithoutPayload, actualWithoutPayload);
+ }
+
+ public static long asLong(BytesRef b) {
+ return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+ b.offset + 4) & 0xFFFFFFFFL);
+ }
+
+ private static int asIntInternal(BytesRef b, int pos) {
+ return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+ | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+ }
+}
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1531664&r1=1531663&r2=1531664&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Sun Oct 13 10:26:22 2013
@@ -52,8 +52,6 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreq;
-import org.apache.lucene.search.suggest.TermFreqArrayIterator;
import org.apache.lucene.search.suggest.TermFreqPayload;
import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
import org.apache.lucene.util.BytesRef;
@@ -65,18 +63,18 @@ public class AnalyzingSuggesterTest exte
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
- Iterable<TermFreq> keys = shuffle(
- new TermFreq("foo", 50),
- new TermFreq("bar", 10),
- new TermFreq("barbar", 10),
- new TermFreq("barbar", 12),
- new TermFreq("barbara", 6),
- new TermFreq("bar", 5),
- new TermFreq("barbara", 1)
+ Iterable<TermFreqPayload> keys = shuffle(
+ new TermFreqPayload("foo", 50),
+ new TermFreqPayload("bar", 10),
+ new TermFreqPayload("barbar", 10),
+ new TermFreqPayload("barbar", 12),
+ new TermFreqPayload("barbara", 6),
+ new TermFreqPayload("bar", 5),
+ new TermFreqPayload("barbara", 1)
);
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
@@ -165,14 +163,14 @@ public class AnalyzingSuggesterTest exte
public void testRandomRealisticKeys() throws IOException {
LineFileDocs lineFile = new LineFileDocs(random());
Map<String, Long> mapping = new HashMap<>();
- List<TermFreq> keys = new ArrayList<>();
+ List<TermFreqPayload> keys = new ArrayList<>();
int howMany = atLeast(100); // this might bring up duplicates
for (int i = 0; i < howMany; i++) {
Document nextDoc = lineFile.nextDoc();
String title = nextDoc.getField("title").stringValue();
int randomWeight = random().nextInt(100);
- keys.add(new TermFreq(title, randomWeight));
+ keys.add(new TermFreqPayload(title, randomWeight));
if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
mapping.put(title, Long.valueOf(randomWeight));
}
@@ -183,15 +181,15 @@ public class AnalyzingSuggesterTest exte
boolean doPayloads = random().nextBoolean();
if (doPayloads) {
List<TermFreqPayload> keysAndPayloads = new ArrayList<>();
- for (TermFreq termFreq : keys) {
+ for (TermFreqPayload termFreq : keys) {
keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
}
analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
} else {
- analyzingSuggester.build(new TermFreqArrayIterator(keys));
+ analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys));
}
- for (TermFreq termFreq : keys) {
+ for (TermFreqPayload termFreq : keys) {
List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
for (LookupResult lookupResult : lookup) {
assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
@@ -211,14 +209,14 @@ public class AnalyzingSuggesterTest exte
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("the ghost of christmas past", 50),
+ TermFreqPayload keys[] = new TermFreqPayload[] {
+ new TermFreqPayload("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.setPreservePositionIncrements(false);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@@ -241,23 +239,23 @@ public class AnalyzingSuggesterTest exte
public void testEmpty() throws Exception {
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
- suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}
public void testNoSeps() throws Exception {
- TermFreq[] keys = new TermFreq[] {
- new TermFreq("ab cd", 0),
- new TermFreq("abcd", 1),
+ TermFreqPayload[] keys = new TermFreqPayload[] {
+ new TermFreqPayload("ab cd", 0),
+ new TermFreqPayload("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@@ -318,13 +316,13 @@ public class AnalyzingSuggesterTest exte
}
};
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("wifi network is slow", 50),
- new TermFreq("wi fi network is fast", 10),
+ TermFreqPayload keys[] = new TermFreqPayload[] {
+ new TermFreqPayload("wifi network is slow", 50),
+ new TermFreqPayload("wi fi network is fast", 10),
};
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
System.out.println("Results: " + results);
@@ -384,12 +382,12 @@ public class AnalyzingSuggesterTest exte
}
};
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("ab xc", 50),
- new TermFreq("ba xd", 50),
+ TermFreqPayload keys[] = new TermFreqPayload[] {
+ new TermFreqPayload("ab xc", 50),
+ new TermFreqPayload("ba xd", 50),
};
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new TermFreqPayloadArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@@ -462,11 +460,11 @@ public class AnalyzingSuggesterTest exte
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 1),
- new TermFreq("x y z", 3),
- new TermFreq("x", 2),
- new TermFreq("z z z", 20),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("x y", 1),
+ new TermFreqPayload("x y z", 3),
+ new TermFreqPayload("x", 2),
+ new TermFreqPayload("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@@ -502,11 +500,11 @@ public class AnalyzingSuggesterTest exte
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 1),
- new TermFreq("x y z", 3),
- new TermFreq("x", 2),
- new TermFreq("z z z", 20),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("x y", 1),
+ new TermFreqPayload("x y z", 3),
+ new TermFreqPayload("x", 2),
+ new TermFreqPayload("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@@ -657,12 +655,12 @@ public class AnalyzingSuggesterTest exte
boolean doPayloads = random().nextBoolean();
- TermFreq[] keys = null;
+ TermFreqPayload[] keys = null;
TermFreqPayload[] payloadKeys = null;
if (doPayloads) {
payloadKeys = new TermFreqPayload[numQueries];
} else {
- keys = new TermFreq[numQueries];
+ keys = new TermFreqPayload[numQueries];
}
boolean preserveSep = random().nextBoolean();
@@ -735,7 +733,7 @@ public class AnalyzingSuggesterTest exte
payload = new BytesRef(bytes);
payloadKeys[i] = new TermFreqPayload(key, weight, payload);
} else {
- keys[i] = new TermFreq(key, weight);
+ keys[i] = new TermFreqPayload(key, weight);
payload = null;
}
@@ -758,7 +756,7 @@ public class AnalyzingSuggesterTest exte
if (doPayloads) {
suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys)));
} else {
- suggester.build(new TermFreqArrayIterator(shuffle(keys)));
+ suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys)));
}
for (String prefix : allPrefixes) {
@@ -876,8 +874,8 @@ public class AnalyzingSuggesterTest exte
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1);
- suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40),
- new TermFreq("a ", 50), new TermFreq(" a", 60))));
+ suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40),
+ new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60))));
List<LookupResult> results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@@ -891,11 +889,11 @@ public class AnalyzingSuggesterTest exte
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("a", 2),
- new TermFreq("a b c", 3),
- new TermFreq("a c a", 1),
- new TermFreq("a c b", 1),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("a", 2),
+ new TermFreqPayload("a b c", 3),
+ new TermFreqPayload("a c a", 1),
+ new TermFreqPayload("a c b", 1),
}));
suggester.lookup("a", false, 4);
@@ -907,10 +905,10 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("a", 5),
- new TermFreq("a b", 3),
- new TermFreq("a c", 4),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("a", 5),
+ new TermFreqPayload("a b", 3),
+ new TermFreqPayload("a c", 4),
}));
List<LookupResult> results = suggester.lookup("a", false, 3);
@@ -972,9 +970,9 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(shuffle(
- new TermFreq("hambone", 6),
- new TermFreq("nellie", 5))));
+ suggester.build(new TermFreqPayloadArrayIterator(shuffle(
+ new TermFreqPayload("hambone", 6),
+ new TermFreqPayload("nellie", 5))));
List<LookupResult> results = suggester.lookup("nellie", false, 2);
assertEquals(2, results.size());
@@ -1041,9 +1039,9 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("a", 6),
- new TermFreq("b", 5),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("a", 6),
+ new TermFreqPayload("b", 5),
}));
List<LookupResult> results = suggester.lookup("a", false, 2);
@@ -1114,21 +1112,21 @@ public class AnalyzingSuggesterTest exte
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("a a", 50),
- new TermFreq("a b", 50),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("a a", 50),
+ new TermFreqPayload("a b", 50),
}));
}
public void testDupSurfaceFormsMissingResults3() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("a a", 7),
- new TermFreq("a a", 7),
- new TermFreq("a c", 6),
- new TermFreq("a c", 3),
- new TermFreq("a b", 5),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("a a", 7),
+ new TermFreqPayload("a a", 7),
+ new TermFreqPayload("a c", 6),
+ new TermFreqPayload("a c", 3),
+ new TermFreqPayload("a b", 5),
}));
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
}
@@ -1136,9 +1134,9 @@ public class AnalyzingSuggesterTest exte
public void testEndingSpace() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("i love lucy", 7),
- new TermFreq("isla de muerta", 8),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("i love lucy", 7),
+ new TermFreqPayload("isla de muerta", 8),
}));
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
@@ -1169,15 +1167,15 @@ public class AnalyzingSuggesterTest exte
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {new TermFreq("a", 1)}));
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)}));
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
}
public void testIllegalLookupArgument() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("а где ÐÑÑи?", 7),
+ suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
+ new TermFreqPayload("а где ÐÑÑи?", 7),
}));
try {
suggester.lookup("а\u001E", false, 3);