You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ry...@apache.org on 2012/03/07 00:17:23 UTC
svn commit: r1297785 [9/16] - in
/lucene/dev/branches/lucene3795_lsp_spatial_module: ./ dev-tools/eclipse/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/demo/
dev-tools/maven/lucene/contrib/highlighter/ dev-tools/maven/lucene/...
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java Tue Mar 6 23:17:08 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.queries;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -81,7 +82,7 @@ public class TermsFilter extends Filter
br.copyBytes(term.bytes());
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
docs = termsEnum.docs(acceptDocs, docs, false);
- while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result.set(docs.docID());
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java Tue Mar 6 23:17:08 2012
@@ -69,7 +69,7 @@ public class BoostedQuery extends Query
public BoostedWeight(IndexSearcher searcher) throws IOException {
this.searcher = searcher;
this.qWeight = q.createWeight(searcher);
- this.fcontext = boostVal.newContext(searcher);
+ this.fcontext = ValueSource.newContext(searcher);
boostVal.createWeight(fcontext,searcher);
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java Tue Mar 6 23:17:08 2012
@@ -67,7 +67,7 @@ public class FunctionQuery extends Query
public FunctionWeight(IndexSearcher searcher) throws IOException {
this.searcher = searcher;
- this.context = func.newContext(searcher);
+ this.context = ValueSource.newContext(searcher);
func.createWeight(context, searcher);
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/ValueSource.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/ValueSource.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queries/src/java/org/apache/lucene/queries/function/ValueSource.java Tue Mar 6 23:17:08 2012
@@ -120,7 +120,7 @@ public abstract class ValueSource implem
}
@Override
- public FieldComparator newComparator(String fieldname, int numHits,
+ public FieldComparator<Double> newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) throws IOException {
return new ValueSourceComparator(context, numHits);
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java Tue Mar 6 23:17:08 2012
@@ -1187,7 +1187,7 @@ public abstract class QueryParserBase {
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
- || c == '*' || c == '?' || c == '|' || c == '&') {
+ || c == '*' || c == '?' || c == '|' || c == '&' || c == '/') {
sb.append('\\');
}
sb.append(c);
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/ext/Extensions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/ext/Extensions.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/ext/Extensions.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/java/org/apache/lucene/queryparser/ext/Extensions.java Tue Mar 6 23:17:08 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.queryparser.ex
* limitations under the License.
*/
import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
import java.util.HashMap;
import java.util.Map;
@@ -139,7 +140,7 @@ public class Extensions {
* a backslash character.
*/
public String escapeExtensionField(String extfield) {
- return QueryParser.escape(extfield);
+ return QueryParserBase.escape(extfield);
}
/**
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java Tue Mar 6 23:17:08 2012
@@ -88,7 +88,7 @@ public class TestMultiAnalyzer extends B
qp.setPhraseSlop(0);
// non-default operator:
- qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+ qp.setDefaultOperator(QueryParserBase.AND_OPERATOR);
assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString());
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java Tue Mar 6 23:17:08 2012
@@ -117,7 +117,7 @@ public class TestMultiFieldQueryParser e
assertEquals("(b:one t:one) f:two", q.toString());
// AND mode:
- mfqp.setDefaultOperator(QueryParser.AND_OPERATOR);
+ mfqp.setDefaultOperator(QueryParserBase.AND_OPERATOR);
q = mfqp.parse("one two");
assertEquals("+(b:one t:one) +(b:two t:two)", q.toString());
q = mfqp.parse("\"aa bb cc\" \"dd ee\"");
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java Tue Mar 6 23:17:08 2012
@@ -32,7 +32,7 @@ public class TestQueryParser extends Que
if (a == null)
a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a);
- qp.setDefaultOperator(QueryParser.OR_OPERATOR);
+ qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
return qp;
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java Tue Mar 6 23:17:08 2012
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.util.QueryParserTestBase;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -47,7 +48,7 @@ public class TestExtendableQueryParser e
QueryParser qp = extensions == null ? new ExtendableQueryParser(
TEST_VERSION_CURRENT, "field", a) : new ExtendableQueryParser(
TEST_VERSION_CURRENT, "field", a, extensions);
- qp.setDefaultOperator(QueryParser.OR_OPERATOR);
+ qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
return qp;
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java Tue Mar 6 23:17:08 2012
@@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.CharStream;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -160,7 +161,7 @@ public abstract class QueryParserTestBas
public void assertEscapedQueryEquals(String query, Analyzer a, String result)
throws Exception {
- String escapedQuery = QueryParser.escape(query);
+ String escapedQuery = QueryParserBase.escape(query);
if (!escapedQuery.equals(result)) {
fail("Query /" + query + "/ yielded /" + escapedQuery
+ "/, expecting /" + result + "/");
@@ -200,7 +201,7 @@ public abstract class QueryParserTestBas
if (a == null)
a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a);
- qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+ qp.setDefaultOperator(QueryParserBase.AND_OPERATOR);
return qp.parse(query);
}
@@ -382,11 +383,11 @@ public abstract class QueryParserTestBas
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random));
// make sure OR is the default:
- assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
- qp.setDefaultOperator(QueryParser.AND_OPERATOR);
- assertEquals(QueryParser.AND_OPERATOR, qp.getDefaultOperator());
- qp.setDefaultOperator(QueryParser.OR_OPERATOR);
- assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
+ assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
+ qp.setDefaultOperator(QueryParserBase.AND_OPERATOR);
+ assertEquals(QueryParserBase.AND_OPERATOR, qp.getDefaultOperator());
+ qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
+ assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
}
public void testPunct() throws Exception {
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java Tue Mar 6 23:17:08 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.queryparser.xm
*/
import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
@@ -79,7 +78,7 @@ public class TestNumericRangeFilterBuild
}
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterInt() throws Exception {
NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
filterBuilder.setStrictMode(true);
@@ -99,7 +98,7 @@ public class TestNumericRangeFilterBuild
String xml2 = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='10' includeUpper='false'/>";
Document doc2 = getDocumentFromString(xml2);
Filter filter2 = filterBuilder.getFilter(doc2.getDocumentElement());
- assertTrue(filter2 instanceof NumericRangeFilter<?>);
+ assertTrue(filter2 instanceof NumericRangeFilter);
NumericRangeFilter<Integer> numRangeFilter2 = (NumericRangeFilter) filter2;
assertEquals(Integer.valueOf(-1), numRangeFilter2.getMin());
@@ -109,7 +108,7 @@ public class TestNumericRangeFilterBuild
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterLong() throws Exception {
NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
filterBuilder.setStrictMode(true);
@@ -138,7 +137,7 @@ public class TestNumericRangeFilterBuild
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterDouble() throws Exception {
NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
filterBuilder.setStrictMode(true);
@@ -169,7 +168,7 @@ public class TestNumericRangeFilterBuild
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterFloat() throws Exception {
NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder();
filterBuilder.setStrictMode(true);
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java Tue Mar 6 23:17:08 2012
@@ -46,7 +46,7 @@ public class TestNumericRangeQueryBuilde
fail("Expected to throw " + ParserException.class);
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterInt() throws Exception {
NumericRangeQueryBuilder filterBuilder = new NumericRangeQueryBuilder();
@@ -75,7 +75,7 @@ public class TestNumericRangeQueryBuilde
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterLong() throws Exception {
NumericRangeQueryBuilder filterBuilder = new NumericRangeQueryBuilder();
@@ -103,7 +103,7 @@ public class TestNumericRangeQueryBuilde
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterDouble() throws Exception {
NumericRangeQueryBuilder filterBuilder = new NumericRangeQueryBuilder();
@@ -133,7 +133,7 @@ public class TestNumericRangeQueryBuilde
assertFalse(numRangeFilter2.includesMax());
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked","rawtypes"})
public void testGetFilterFloat() throws Exception {
NumericRangeQueryBuilder filterBuilder = new NumericRangeQueryBuilder();
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java Tue Mar 6 23:17:08 2012
@@ -16,7 +16,8 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
-import java.util.Iterator;
+import java.io.IOException;
+import org.apache.lucene.util.BytesRefIterator;
/**
* A simple interface representing a Dictionary. A Dictionary
@@ -30,5 +31,5 @@ public interface Dictionary {
* Return all words present in the dictionary
* @return Iterator
*/
- Iterator<String> getWordsIterator();
+ BytesRefIterator getWordsIterator() throws IOException;
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java Tue Mar 6 23:17:08 2012
@@ -18,15 +18,14 @@
package org.apache.lucene.search.spell;
import java.io.IOException;
-import java.util.Iterator;
+import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.UnicodeUtil;
/**
* HighFrequencyDictionary: terms taken from the given field
@@ -42,7 +41,6 @@ public class HighFrequencyDictionary imp
private IndexReader reader;
private String field;
private float thresh;
- private final CharsRef spare = new CharsRef();
public HighFrequencyDictionary(IndexReader reader, String field, float thresh) {
this.reader = reader;
@@ -50,90 +48,56 @@ public class HighFrequencyDictionary imp
this.thresh = thresh;
}
- public final Iterator<String> getWordsIterator() {
+ public final BytesRefIterator getWordsIterator() throws IOException {
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
- private TermsEnum termsEnum;
- private BytesRef actualTerm;
- private boolean hasNextCalled;
+ final class HighFrequencyIterator implements TermFreqIterator {
+ private final BytesRef spare = new BytesRef();
+ private final TermsEnum termsEnum;
private int minNumDocs;
+ private long freq;
- HighFrequencyIterator() {
- try {
- Terms terms = MultiFields.getTerms(reader, field);
- if (terms != null) {
- termsEnum = terms.iterator(null);
- }
- minNumDocs = (int)(thresh * (float)reader.numDocs());
- } catch (IOException e) {
- throw new RuntimeException(e);
+ HighFrequencyIterator() throws IOException {
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ termsEnum = terms.iterator(null);
+ } else {
+ termsEnum = null;
}
+ minNumDocs = (int)(thresh * (float)reader.numDocs());
}
private boolean isFrequent(int freq) {
return freq >= minNumDocs;
}
- public float freq() {
- try {
- return termsEnum.docFreq();
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
- }
+ public long weight() {
+ return freq;
}
-
- public String next() {
- if (!hasNextCalled && !hasNext()) {
- return null;
- }
- hasNextCalled = false;
-
- if (actualTerm == null) {
- return null;
- } else {
- UnicodeUtil.UTF8toUTF16(actualTerm, spare);
- return spare.toString();
- }
- }
-
- public boolean hasNext() {
- if (hasNextCalled) {
- return actualTerm != null;
- }
- hasNextCalled = true;
- if (termsEnum == null) {
- return false;
- }
-
- while(true) {
-
- try {
- actualTerm = termsEnum.next();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
- // if there are no words return false
- if (actualTerm == null) {
- return false;
- }
-
- // got a valid term, does it pass the threshold?
- try {
+ @Override
+ public BytesRef next() throws IOException {
+ if (termsEnum != null) {
+ BytesRef next;
+ while((next = termsEnum.next()) != null) {
if (isFrequent(termsEnum.docFreq())) {
- return true;
+ freq = termsEnum.docFreq();
+ spare.copyBytes(next);
+ return spare;
}
- } catch (IOException ioe) {
- throw new RuntimeException(ioe);
}
}
+ return null;
}
- public void remove() {
- throw new UnsupportedOperationException();
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ if (termsEnum == null) {
+ return null;
+ } else {
+ return termsEnum.getComparator();
+ }
}
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Tue Mar 6 23:17:08 2012
@@ -18,13 +18,7 @@ package org.apache.lucene.search.spell;
*/
import org.apache.lucene.index.IndexReader;
-
-import java.util.Iterator;
-
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
@@ -49,50 +43,13 @@ public class LuceneDictionary implements
this.field = field;
}
- public final Iterator<String> getWordsIterator() {
- return new LuceneIterator();
- }
-
-
- final class LuceneIterator implements Iterator<String> {
- private TermsEnum termsEnum;
- private BytesRef pendingTerm;
- private final CharsRef spare = new CharsRef();
-
- LuceneIterator() {
- try {
- final Terms terms = MultiFields.getTerms(reader, field);
- if (terms != null) {
- termsEnum = terms.iterator(null);
- pendingTerm = termsEnum.next();
- }
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- public String next() {
- if (pendingTerm == null) {
- return null;
- }
-
- UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
-
- try {
- pendingTerm = termsEnum.next();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
- return spare.toString();
- }
-
- public boolean hasNext() {
- return pendingTerm != null;
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
+ public final BytesRefIterator getWordsIterator() throws IOException {
+ final Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ return terms.iterator(null);
+ } else {
+ return BytesRefIterator.EMPTY_ITERATOR;
}
}
+
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java Tue Mar 6 23:17:08 2012
@@ -18,9 +18,13 @@ package org.apache.lucene.search.spell;
*/
-import java.util.Iterator;
+import java.util.Comparator;
import java.io.*;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
+
/**
* Dictionary represented by a text file.
@@ -33,8 +37,6 @@ import java.io.*;
public class PlainTextDictionary implements Dictionary {
private BufferedReader in;
- private String line;
- private boolean hasNextCalled;
public PlainTextDictionary(File file) throws FileNotFoundException {
in = new BufferedReader(new FileReader(file));
@@ -51,31 +53,42 @@ public class PlainTextDictionary impleme
in = new BufferedReader(reader);
}
- public Iterator<String> getWordsIterator() {
- return new fileIterator();
+ public BytesRefIterator getWordsIterator() throws IOException {
+ return new FileIterator();
}
- final class fileIterator implements Iterator<String> {
- public String next() {
- if (!hasNextCalled) {
- hasNext();
+ final class FileIterator implements BytesRefIterator {
+ private boolean done = false;
+ private final BytesRef spare = new BytesRef();
+ @Override
+ public BytesRef next() throws IOException {
+ if (done) {
+ return null;
}
- hasNextCalled = false;
- return line;
- }
-
- public boolean hasNext() {
- hasNextCalled = true;
+ boolean success = false;
+ BytesRef result;
try {
- line = in.readLine();
- } catch (IOException ex) {
- throw new RuntimeException(ex);
+ String line;
+ if ((line = in.readLine()) != null) {
+ spare.copyChars(line);
+ result = spare;
+ } else {
+ done = true;
+ IOUtils.close(in);
+ result = null;
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(in);
+ }
}
- return (line != null) ? true : false;
+ return result;
}
-
- public void remove() {
- throw new UnsupportedOperationException();
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java Tue Mar 6 23:17:08 2012
@@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
@@ -510,20 +511,18 @@ public class SpellChecker implements jav
boolean isEmpty = termsEnums.isEmpty();
try {
- Iterator<String> iter = dict.getWordsIterator();
- BytesRef currentTerm = new BytesRef();
+ BytesRefIterator iter = dict.getWordsIterator();
+ BytesRef currentTerm;
- terms: while (iter.hasNext()) {
- String word = iter.next();
+ terms: while ((currentTerm = iter.next()) != null) {
+ String word = currentTerm.utf8ToString();
int len = word.length();
if (len < 3) {
continue; // too short we bail but "too long" is fine...
}
if (!isEmpty) {
- // we have a non-empty index, check if the term exists
- currentTerm.copyChars(word);
for (TermsEnum te : termsEnums) {
if (te.seekExact(currentTerm, false)) {
continue terms;
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java Tue Mar 6 23:17:08 2012
@@ -17,34 +17,34 @@ package org.apache.lucene.search.spell;
* limitations under the License.
*/
-import java.util.Iterator;
+import java.io.IOException;
+import java.util.Comparator;
-public interface TermFreqIterator extends Iterator<String> {
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
- public float freq();
+public interface TermFreqIterator extends BytesRefIterator {
+
+ public long weight();
public static class TermFreqIteratorWrapper implements TermFreqIterator {
- private Iterator<String> wrapped;
+ private BytesRefIterator wrapped;
- public TermFreqIteratorWrapper(Iterator<String> wrapped) {
+ public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
this.wrapped = wrapped;
}
- public float freq() {
- return 1.0f;
- }
-
- public boolean hasNext() {
- return wrapped.hasNext();
+ public long weight() {
+ return 1;
}
- public String next() {
- return wrapped.next().toString();
+ public BytesRef next() throws IOException {
+ return wrapped.next();
}
- public void remove() {
- throw new UnsupportedOperationException();
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return wrapped.getComparator();
}
-
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java Tue Mar 6 23:17:08 2012
@@ -17,65 +17,54 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
-import java.util.ArrayList;
-import java.util.List;
-
+import java.io.IOException;
+import java.util.Comparator;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers incoming elements.
+ * @lucene.experimental
*/
public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
-
- /** Entry in the buffer. */
- public static final class Entry implements Comparable<Entry> {
- String word;
- float freq;
-
- public Entry(String word, float freq) {
- this.word = word;
- this.freq = freq;
- }
-
- public int compareTo(Entry o) {
- return word.compareTo(o.word);
- }
- }
-
- protected ArrayList<Entry> entries = new ArrayList<Entry>();
-
- protected int curPos;
- protected Entry curEntry;
-
- public BufferingTermFreqIteratorWrapper(TermFreqIterator source) {
- // read all source data into buffer
- while (source.hasNext()) {
- String w = source.next();
- Entry e = new Entry(w, source.freq());
- entries.add(e);
+ // TODO keep this for now
+ protected BytesRefList entries = new BytesRefList();
+ protected int curPos = -1;
+ protected long[] freqs = new long[1];
+ private final BytesRef spare = new BytesRef();
+ private final Comparator<BytesRef> comp;
+ public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
+ this.comp = source.getComparator();
+ BytesRef spare;
+ int freqIndex = 0;
+ while((spare = source.next()) != null) {
+ entries.append(spare);
+ if (freqIndex >= freqs.length) {
+ freqs = ArrayUtil.grow(freqs, freqs.length+1);
+ }
+ freqs[freqIndex++] = source.weight();
}
- curPos = 0;
+
}
- public float freq() {
- return curEntry.freq;
+ public long weight() {
+ return freqs[curPos];
}
- public boolean hasNext() {
- return curPos < entries.size();
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ entries.get(spare, curPos);
+ return spare;
+ }
+ return null;
}
- public String next() {
- curEntry = entries.get(curPos);
- curPos++;
- return curEntry.word;
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return comp;
}
- public void remove() {
- throw new UnsupportedOperationException("remove is not supported");
- }
-
- public List<Entry> entries() {
- return entries;
- }
+
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java Tue Mar 6 23:17:08 2012
@@ -19,9 +19,12 @@ package org.apache.lucene.search.suggest
import java.io.*;
+import java.util.Comparator;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
/**
@@ -36,7 +39,7 @@ public class FileDictionary implements D
private BufferedReader in;
private String line;
- private boolean hasNextCalled;
+ private boolean done = false;
public FileDictionary(InputStream dictFile) {
in = new BufferedReader(new InputStreamReader(dictFile));
@@ -50,45 +53,49 @@ public class FileDictionary implements D
}
public TermFreqIterator getWordsIterator() {
- return new fileIterator();
+ return new FileIterator();
}
- final class fileIterator implements TermFreqIterator {
- private float curFreq;
+ final class FileIterator implements TermFreqIterator {
+ private long curFreq;
+ private final BytesRef spare = new BytesRef();
- public String next() {
- if (!hasNextCalled) {
- hasNext();
- }
- hasNextCalled = false;
- return line;
- }
-
- public float freq() {
+
+ public long weight() {
return curFreq;
}
- public boolean hasNext() {
- hasNextCalled = true;
- try {
- line = in.readLine();
- if (line != null) {
- String[] fields = line.split("\t");
- if (fields.length > 1) {
- curFreq = Float.parseFloat(fields[1]);
- line = fields[0];
- } else {
- curFreq = 1;
+ @Override
+ public BytesRef next() throws IOException {
+ if (done) {
+ return null;
+ }
+ line = in.readLine();
+ if (line != null) {
+ String[] fields = line.split("\t");
+ if (fields.length > 1) {
+ // keep reading floats for bw compat
+ try {
+ curFreq = Long.parseLong(fields[1]);
+ } catch (NumberFormatException e) {
+ curFreq = (long)Double.parseDouble(fields[1]);
}
+ spare.copyChars(fields[0]);
+ } else {
+ spare.copyChars(line);
+ curFreq = 1;
}
- } catch (IOException ex) {
- throw new RuntimeException(ex);
+ return spare;
+ } else {
+ done = true;
+ IOUtils.close(in);
+ return null;
}
- return (line != null) ? true : false;
}
- public void remove() {
- throw new UnsupportedOperationException();
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java Tue Mar 6 23:17:08 2012
@@ -19,22 +19,29 @@ package org.apache.lucene.search.suggest
import java.io.File;
import java.io.IOException;
-import java.util.Iterator;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
+/**
+ * Simple Lookup interface for {@link CharSequence} suggestions.
+ * @lucene.experimental
+ */
public abstract class Lookup {
/**
* Result of a lookup.
*/
public static final class LookupResult implements Comparable<LookupResult> {
- public final String key;
- public final float value;
+ public final CharSequence key;
+ public final long value;
- public LookupResult(String key, float value) {
+ public LookupResult(CharSequence key, long value) {
this.key = key;
this.value = value;
}
@@ -46,10 +53,32 @@ public abstract class Lookup {
/** Compare alphabetically. */
public int compareTo(LookupResult o) {
- return this.key.compareTo(o.key);
+ return CHARSEQUENCE_COMPARATOR.compare(key, o.key);
}
}
+ public static final Comparator<CharSequence> CHARSEQUENCE_COMPARATOR = new CharSequenceComparator();
+
+ private static class CharSequenceComparator implements Comparator<CharSequence> {
+
+ @Override
+ public int compare(CharSequence o1, CharSequence o2) {
+ final int l1 = o1.length();
+ final int l2 = o2.length();
+
+ final int aStop = Math.min(l1, l2);
+ for (int i = 0; i < aStop; i++) {
+ int diff = o1.charAt(i) - o2.charAt(i);
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ // One is a prefix of the other, or, they are equal:
+ return l1 - l2;
+ }
+
+ }
+
public static final class LookupPriorityQueue extends PriorityQueue<LookupResult> {
public LookupPriorityQueue(int size) {
@@ -77,7 +106,7 @@ public abstract class Lookup {
* {@link UnsortedTermFreqIteratorWrapper} in such case.
*/
public void build(Dictionary dict) throws IOException {
- Iterator<String> it = dict.getWordsIterator();
+ BytesRefIterator it = dict.getWordsIterator();
TermFreqIterator tfit;
if (it instanceof TermFreqIterator) {
tfit = (TermFreqIterator)it;
@@ -87,48 +116,55 @@ public abstract class Lookup {
build(tfit);
}
+ /**
+ * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
+ * The implementation might re-sort the data internally.
+ */
public abstract void build(TermFreqIterator tfit) throws IOException;
/**
+ * Look up a key and return possible completion for this key.
+ * @param key lookup key. Depending on the implementation this may be
+ * a prefix, misspelling, or even infix.
+ * @param onlyMorePopular return only more popular results
+ * @param num maximum number of results to return
+ * @return a list of possible completions, with their relative weight (e.g. popularity)
+ */
+ public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);
+
+
+ /**
* Persist the constructed lookup data to a directory. Optional operation.
- * @param storeDir directory where data can be stored.
+ * @param output {@link OutputStream} to write the data to.
* @return true if successful, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
- public abstract boolean store(File storeDir) throws IOException;
+ public abstract boolean store(OutputStream output) throws IOException;
/**
* Discard current lookup data and load it from a previously saved copy.
* Optional operation.
- * @param storeDir directory where lookup data was stored.
+ * @param input the {@link InputStream} to load the lookup data.
* @return true if completed successfully, false if unsuccessful or not supported.
* @throws IOException when fatal IO error occurs.
*/
- public abstract boolean load(File storeDir) throws IOException;
+ public abstract boolean load(InputStream input) throws IOException;
/**
- * Look up a key and return possible completion for this key.
- * @param key lookup key. Depending on the implementation this may be
- * a prefix, misspelling, or even infix.
- * @param onlyMorePopular return only more popular results
- * @param num maximum number of results to return
- * @return a list of possible completions, with their relative weight (e.g. popularity)
+ * Persist the constructed lookup data to a directory. Optional operation.
+ * @param storeDir directory where data can be stored.
+ * @return true if successful, false if unsuccessful or not supported.
+ * @throws IOException when fatal IO error occurs.
*/
- public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
+ public abstract boolean store(File storeDir) throws IOException;
/**
- * Modify the lookup data by recording additional data. Optional operation.
- * @param key new lookup key
- * @param value value to associate with this key
- * @return true if new key is added, false if it already exists or operation
- * is not supported.
+ * Discard current lookup data and load it from a previously saved copy.
+ * Optional operation.
+ * @param storeDir directory where lookup data was stored.
+ * @return true if completed successfully, false if unsuccessful or not supported.
+ * @throws IOException when fatal IO error occurs.
*/
- public abstract boolean add(String key, Object value);
+ public abstract boolean load(File storeDir) throws IOException;
- /**
- * Get value associated with a specific key.
- * @param key lookup key
- * @return associated value
- */
- public abstract Object get(String key);
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java Tue Mar 6 23:17:08 2012
@@ -17,19 +17,166 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
-import java.util.Collections;
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
-import org.apache.lucene.search.spell.SortedIterator;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.suggest.fst.Sort;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
/**
- * This wrapper buffers incoming elements and makes sure they are sorted in
- * ascending lexicographic order.
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
*/
-public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {
+public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
+
+ private final TermFreqIterator source;
+ private File tempInput;
+ private File tempSorted;
+ private final ByteSequencesReader reader;
+ private boolean done = false;
+
+ private long weight;
+ private final BytesRef scratch = new BytesRef();
+ private final Comparator<BytesRef> comparator;
+
+ public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
+ this(source, comparator, false);
+ }
+
+ public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
+ this.source = source;
+ this.comparator = comparator;
+ this.reader = sort(compareRawBytes ? comparator : new BytesOnlyComparator(this.comparator));
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ boolean success = false;
+ if (done) {
+ return null;
+ }
+ try {
+ ByteArrayDataInput input = new ByteArrayDataInput();
+ if (reader.read(scratch)) {
+ weight = decode(scratch, input);
+ success = true;
+ return scratch;
+ }
+ close();
+ success = done = true;
+ return null;
+ } finally {
+ if (!success) {
+ done = true;
+ close();
+ }
+ }
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return comparator;
+ }
+
+ @Override
+ public long weight() {
+ return weight;
+ }
+
+ private Sort.ByteSequencesReader sort(Comparator<BytesRef> comparator) throws IOException {
+ String prefix = getClass().getSimpleName();
+ File directory = Sort.defaultTempDir();
+ tempInput = File.createTempFile(prefix, ".input", directory);
+ tempSorted = File.createTempFile(prefix, ".sorted", directory);
+
+ final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+ boolean success = false;
+ try {
+ BytesRef spare;
+ byte[] buffer = new byte[0];
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
- public SortedTermFreqIteratorWrapper(TermFreqIterator source) {
- super(source);
- Collections.sort(entries);
+ while ((spare = source.next()) != null) {
+ encode(writer, output, buffer, spare, source.weight());
+ }
+ writer.close();
+ new Sort(comparator).sort(tempInput, tempSorted);
+ ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+ success = true;
+ return reader;
+
+ } finally {
+ if (success) {
+ IOUtils.close(writer);
+ } else {
+ try {
+ IOUtils.closeWhileHandlingException(writer);
+ } finally {
+ close();
+ }
+ }
+
+ }
+ }
+
+ private void close() throws IOException {
+ if (tempInput != null) {
+ tempInput.delete();
+ }
+ if (tempSorted != null) {
+ tempSorted.delete();
+ }
+ IOUtils.close(reader);
+ }
+
+ private final static class BytesOnlyComparator implements Comparator<BytesRef> {
+
+ final Comparator<BytesRef> other;
+ private final BytesRef leftScratch = new BytesRef();
+ private final BytesRef rightScratch = new BytesRef();
+
+ public BytesOnlyComparator(Comparator<BytesRef> other) {
+ this.other = other;
+ }
+
+ @Override
+ public int compare(BytesRef left, BytesRef right) {
+ wrap(leftScratch, left);
+ wrap(rightScratch, right);
+ return other.compare(leftScratch, rightScratch);
+ }
+
+ private void wrap(BytesRef wrapper, BytesRef source) {
+ wrapper.bytes = source.bytes;
+ wrapper.offset = source.offset;
+ wrapper.length = source.length - 8;
+
+ }
+ }
+
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+ if (spare.length + 8 >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, spare.length + 8);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ output.writeLong(weight);
+ writer.write(buffer, 0, output.getPosition());
+ }
+
+ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
+ scratch.length -= 8; // sep + long
+ return tmpInput.readLong();
}
+
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java Tue Mar 6 23:17:08 2012
@@ -17,18 +17,47 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
-import java.util.Collections;
+import java.io.IOException;
+import java.util.Random;
import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;
/**
* This wrapper buffers the incoming elements and makes sure they are in
* random order.
+ * @lucene.experimental
*/
public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
-
- public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) {
+ // TODO keep this for now
+ private final int[] ords;
+ private int currentOrd = -1;
+ private final BytesRef spare = new BytesRef();
+ public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
super(source);
- Collections.shuffle(entries);
+ ords = new int[entries.size()];
+ Random random = new Random();
+ for (int i = 0; i < ords.length; i++) {
+ ords[i] = i;
+ }
+ for (int i = 0; i < ords.length; i++) {
+ int randomPosition = random.nextInt(ords.length);
+ int temp = ords[i];
+ ords[i] = ords[randomPosition];
+ ords[randomPosition] = temp;
+ }
+ }
+
+ @Override
+ public long weight() {
+ return freqs[currentOrd];
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ return entries.get(spare, (currentOrd = ords[curPos]));
+ }
+ return null;
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java Tue Mar 6 23:17:08 2012
@@ -18,13 +18,16 @@ package org.apache.lucene.search.suggest
*/
import java.io.IOException;
-import java.util.Iterator;
+import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
/**
* Collects {@link BytesRef} and then allows one to iterate over their sorted order. Implementations
- * of this interface will be called in a single-threaded scenario.
+ * of this interface will be called in a single-threaded scenario.
+ * @lucene.experimental
+ * @lucene.internal
*/
public interface BytesRefSorter {
/**
@@ -42,5 +45,7 @@ public interface BytesRefSorter {
*
* @throws IOException If an I/O exception occurs.
*/
- Iterator<BytesRef> iterator() throws IOException;
+ BytesRefIterator iterator() throws IOException;
+
+ Comparator<BytesRef> getComparator();
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java Tue Mar 6 23:17:08 2012
@@ -18,59 +18,63 @@ package org.apache.lucene.search.suggest
*/
import java.io.*;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
+import java.util.Comparator;
import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
/**
* Builds and iterates over sequences stored on disk.
+ * @lucene.experimental
+ * @lucene.internal
*/
public class ExternalRefSorter implements BytesRefSorter, Closeable {
private final Sort sort;
private Sort.ByteSequencesWriter writer;
private File input;
- private File sorted;
-
+ private File sorted;
+
/**
* Will buffer all sequences to a temporary file and then sort (all on-disk).
*/
public ExternalRefSorter(Sort sort) throws IOException {
this.sort = sort;
- this.input = File.createTempFile("RefSorter-", ".raw", Sort.defaultTempDir());
+ this.input = File.createTempFile("RefSorter-", ".raw",
+ Sort.defaultTempDir());
this.writer = new Sort.ByteSequencesWriter(input);
}
-
+
@Override
public void add(BytesRef utf8) throws IOException {
- if (writer == null)
- throw new IllegalStateException();
+ if (writer == null) throw new IllegalStateException();
writer.write(utf8);
}
-
- @Override
- public Iterator<BytesRef> iterator() throws IOException {
+
+ public BytesRefIterator iterator() throws IOException {
if (sorted == null) {
closeWriter();
-
- sorted = File.createTempFile("RefSorter-", ".sorted", Sort.defaultTempDir());
+
+ sorted = File.createTempFile("RefSorter-", ".sorted",
+ Sort.defaultTempDir());
sort.sort(input, sorted);
-
+
input.delete();
input = null;
}
-
- return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted));
+
+ return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted),
+ sort.getComparator());
}
-
+
private void closeWriter() throws IOException {
if (writer != null) {
writer.close();
writer = null;
}
}
-
+
/**
* Removes any written temporary files.
*/
@@ -83,40 +87,54 @@ public class ExternalRefSorter implement
if (sorted != null) sorted.delete();
}
}
-
+
/**
* Iterate over byte refs in a file.
*/
- class ByteSequenceIterator implements Iterator<BytesRef> {
- private ByteSequencesReader reader;
- private byte[] next;
-
- public ByteSequenceIterator(ByteSequencesReader reader) throws IOException {
+ class ByteSequenceIterator implements BytesRefIterator {
+ private final ByteSequencesReader reader;
+ private BytesRef scratch = new BytesRef();
+ private final Comparator<BytesRef> comparator;
+
+ public ByteSequenceIterator(ByteSequencesReader reader,
+ Comparator<BytesRef> comparator) {
this.reader = reader;
- this.next = reader.read();
- }
-
- @Override
- public boolean hasNext() {
- return next != null;
+ this.comparator = comparator;
}
@Override
- public BytesRef next() {
- if (next == null) throw new NoSuchElementException();
- BytesRef r = new BytesRef(next);
+ public BytesRef next() throws IOException {
+ if (scratch == null) {
+ return null;
+ }
+ boolean success = false;
try {
- next = reader.read();
- if (next == null) {
- reader.close();
+ byte[] next = reader.read();
+ if (next != null) {
+ scratch.bytes = next;
+ scratch.length = next.length;
+ scratch.offset = 0;
+ } else {
+ IOUtils.close(reader);
+ scratch = null;
+ }
+ success = true;
+ return scratch;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(reader);
}
- } catch (IOException e) {
- throw new RuntimeException(e);
}
- return r;
}
-
+
@Override
- public void remove() { throw new UnsupportedOperationException(); }
+ public Comparator<BytesRef> getComparator() {
+ return comparator;
+ }
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return sort.getComparator();
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java Tue Mar 6 23:17:08 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.util.fst.FST.Ar
* Finite state automata based implementation of "autocomplete" functionality.
*
* @see FSTCompletionBuilder
+ * @lucene.experimental
*/
// TODO: we could store exact weights as outputs from the FST (int4 encoded
@@ -159,10 +160,10 @@ public class FSTCompletion {
* @param utf8
* The sequence of utf8 bytes to follow.
*
- * @return Returns the bucket number of the match or <code>null</code> if no
+ * @return Returns the bucket number of the match or <code>-1</code> if no
* match was found.
*/
- private Integer getExactMatchStartingFromRootArc(
+ private int getExactMatchStartingFromRootArc(
int rootArcIndex, BytesRef utf8) {
// Get the UTF-8 bytes representation of the input key.
try {
@@ -186,7 +187,7 @@ public class FSTCompletion {
}
// No match.
- return null;
+ return -1;
}
/**
@@ -199,7 +200,7 @@ public class FSTCompletion {
* @return Returns the suggestions, sorted by their approximated weight first
* (decreasing) and then alphabetically (UTF-8 codepoint order).
*/
- public List<Completion> lookup(String key, int num) {
+ public List<Completion> lookup(CharSequence key, int num) {
if (key.length() == 0 || automaton == null) {
return EMPTY_RESULT;
}
@@ -273,8 +274,8 @@ public class FSTCompletion {
// exact match, if requested.
if (exactFirst) {
if (!checkExistingAndReorder(res, key)) {
- Integer exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
- if (exactMatchBucket != null) {
+ int exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
+ if (exactMatchBucket != -1) {
// Insert as the first result and truncate at num.
while (res.size() >= num) {
res.remove(res.size() - 1);
@@ -385,10 +386,10 @@ public class FSTCompletion {
}
/**
- * Returns the bucket assigned to a given key (if found) or <code>null</code> if
+ * Returns the bucket assigned to a given key (if found) or <code>-1</code> if
* no exact match exists.
*/
- public Integer getBucket(String key) {
+ public int getBucket(CharSequence key) {
return getExactMatchStartingFromRootArc(0, new BytesRef(key));
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java Tue Mar 6 23:17:08 2012
@@ -19,9 +19,9 @@ package org.apache.lucene.search.suggest
import java.io.Closeable;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
@@ -46,7 +46,7 @@ import org.apache.lucene.util.fst.*;
* </ul>
*
* <p>
- * At runtime, in {@link FSTCompletion#lookup(String, int)},
+ * At runtime, in {@link FSTCompletion#lookup(CharSequence, int)},
* the automaton is utilized as follows:
* <ul>
* <li>For each possible term weight encoded in the automaton (cached arcs from
@@ -98,6 +98,7 @@ import org.apache.lucene.util.fst.*;
* change, requiring you to rebuild the FST suggest index.
*
* @see FSTCompletion
+ * @lucene.experimental
*/
public class FSTCompletionBuilder {
/**
@@ -143,10 +144,11 @@ public class FSTCompletionBuilder {
/**
* Creates an {@link FSTCompletion} with default options: 10 buckets, exact match
- * promoted to first position and {@link InMemorySorter}.
+ * promoted to first position and {@link InMemorySorter} with a comparator obtained from
+ * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}.
*/
public FSTCompletionBuilder() {
- this(DEFAULT_BUCKETS, new InMemorySorter(), Integer.MAX_VALUE);
+ this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE);
}
/**
@@ -237,10 +239,12 @@ public class FSTCompletionBuilder {
shareMaxTailLength, outputs, null, false);
BytesRef scratch = new BytesRef();
+ BytesRef entry;
final IntsRef scratchIntsRef = new IntsRef();
int count = 0;
- for (Iterator<BytesRef> i = sorter.iterator(); i.hasNext(); count++) {
- BytesRef entry = i.next();
+ BytesRefIterator iter = sorter.iterator();
+ while((entry = iter.next()) != null) {
+ count++;
if (scratch.compareTo(entry) != 0) {
builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
scratch.copyBytes(entry);
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java Tue Mar 6 23:17:08 2012
@@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest
import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
@@ -29,6 +31,8 @@ import org.apache.lucene.search.suggest.
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.*;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
@@ -37,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutp
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
* <p>This adapter differs from {@link FSTCompletion} in that it attempts
- * to discretize any "weights" as passed from in {@link TermFreqIterator#freq()}
+ * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@@ -55,6 +59,7 @@ import org.apache.lucene.util.fst.NoOutp
* use {@link FSTCompletion} directly or {@link TSTLookup}, for example.
*
* @see FSTCompletion
+ * @lucene.experimental
*/
public class FSTCompletionLookup extends Lookup {
/**
@@ -158,20 +163,17 @@ public class FSTCompletionLookup extends
// If negative floats are allowed some trickery needs to be done to find their byte order.
boolean success = false;
try {
- BytesRef tmp1 = new BytesRef();
byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
- while (tfit.hasNext()) {
- String key = tfit.next();
- UnicodeUtil.UTF16toUTF8(key, 0, key.length(), tmp1);
-
- if (tmp1.length + 4 >= buffer.length) {
- buffer = ArrayUtil.grow(buffer, tmp1.length + 4);
+ BytesRef spare;
+ while ((spare = tfit.next()) != null) {
+ if (spare.length + 4 >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, spare.length + 4);
}
output.reset(buffer);
- output.writeInt(FloatMagic.toSortable(tfit.freq()));
- output.writeBytes(tmp1.bytes, tmp1.offset, tmp1.length);
+ output.writeInt(encodeWeight(tfit.weight()));
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
writer.close();
@@ -187,12 +189,13 @@ public class FSTCompletionLookup extends
reader = new Sort.ByteSequencesReader(tempSorted);
long line = 0;
int previousBucket = 0;
- float previousScore = 0;
+ int previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
+ BytesRef tmp1 = new BytesRef();
BytesRef tmp2 = new BytesRef();
while (reader.read(tmp1)) {
input.reset(tmp1.bytes);
- float currentScore = FloatMagic.fromSortable(input.readInt());
+ int currentScore = input.readInt();
int bucket;
if (line > 0 && currentScore == previousScore) {
@@ -228,9 +231,17 @@ public class FSTCompletionLookup extends
tempSorted.delete();
}
}
+
+ /** weight -> cost */
+ private static int encodeWeight(long value) {
+ if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
+ throw new UnsupportedOperationException("cannot encode value: " + value);
+ }
+ return (int)value;
+ }
@Override
- public List<LookupResult> lookup(String key, boolean higherWeightsFirst, int num) {
+ public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
final List<Completion> completions;
if (higherWeightsFirst) {
completions = higherWeightsCompletion.lookup(key, num);
@@ -239,25 +250,18 @@ public class FSTCompletionLookup extends
}
final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
+ CharsRef spare = new CharsRef();
for (Completion c : completions) {
- results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket));
+ spare.grow(c.utf8.length);
+ UnicodeUtil.UTF8toUTF16(c.utf8, spare);
+ results.add(new LookupResult(spare.toString(), c.bucket));
}
return results;
}
- @Override
- public boolean add(String key, Object value) {
- // Not supported.
- return false;
- }
-
- @Override
- public Float get(String key) {
- Integer bucket = normalCompletion.getBucket(key);
- if (bucket == null)
- return null;
- else
- return (float) normalCompletion.getBucket(key) / normalCompletion.getBucketCount();
+ public Object get(CharSequence key) {
+ final int bucket = normalCompletion.getBucket(key);
+ return bucket == -1 ? null : Long.valueOf(bucket);
}
/**
@@ -293,4 +297,30 @@ public class FSTCompletionLookup extends
normalCompletion.getFST().save(new File(storeDir, FILENAME));
return true;
}
+
+ @Override
+ public synchronized boolean store(OutputStream output) throws IOException {
+
+ if (this.normalCompletion == null)
+ return false;
+ try {
+ normalCompletion.getFST().save(new OutputStreamDataOutput(output));
+ } finally {
+ IOUtils.close(output);
+ }
+ return true;
+ }
+
+ @Override
+ public synchronized boolean load(InputStream input) throws IOException {
+ try {
+ this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
+ new InputStreamDataInput(input), NoOutputs.getSingleton()));
+ this.normalCompletion = new FSTCompletion(
+ higherWeightsCompletion.getFST(), false, exactMatchFirst);
+ } finally {
+ IOUtils.close(input);
+ }
+ return true;
+ }
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java Tue Mar 6 23:17:08 2012
@@ -17,29 +17,40 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
-import java.util.*;
+import java.util.Comparator;
+import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
/**
* An {@link BytesRefSorter} that keeps all the entries in memory.
+ * @lucene.experimental
+ * @lucene.internal
*/
public final class InMemorySorter implements BytesRefSorter {
- // TODO: use a single byte[] to back up all entries?
- private final ArrayList<BytesRef> refs = new ArrayList<BytesRef>();
-
+ private final BytesRefList buffer = new BytesRefList();
private boolean closed = false;
+ private final Comparator<BytesRef> comparator;
+ public InMemorySorter(Comparator<BytesRef> comparator) {
+ this.comparator = comparator;
+ }
+
@Override
public void add(BytesRef utf8) {
if (closed) throw new IllegalStateException();
- refs.add(BytesRef.deepCopyOf(utf8));
+ buffer.append(utf8);
}
@Override
- public Iterator<BytesRef> iterator() {
+ public BytesRefIterator iterator() {
closed = true;
- Collections.sort(refs, BytesRef.getUTF8SortedAsUnicodeComparator());
- return Collections.unmodifiableCollection(refs).iterator();
+ return buffer.iterator(comparator);
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return comparator;
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java Tue Mar 6 23:17:08 2012
@@ -20,15 +20,10 @@ package org.apache.lucene.search.suggest
import java.io.*;
import java.util.*;
+import org.apache.lucene.search.suggest.BytesRefList;
import org.apache.lucene.util.*;
import org.apache.lucene.util.PriorityQueue;
-// TODO: the buffer is currently byte[][] which with very small arrays will terribly overallocate
-// memory (alignments) and make GC very happy.
-//
-// We could move it to a single byte[] + and use custom sorting, but we'd need to check if this
-// yields any improvement first.
-
/**
* On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
* fields:
@@ -38,6 +33,8 @@ import org.apache.lucene.util.PriorityQu
* </ul>
*
* @see #sort(File, File)
+ * @lucene.experimental
+ * @lucene.internal
*/
public final class Sort {
public final static int MB = 1024 * 1024;
@@ -59,11 +56,6 @@ public final class Sort {
*/
public final static int MAX_TEMPFILES = 128;
- /**
- * Minimum slot buffer expansion.
- */
- private final static int MIN_EXPECTED_GROWTH = 1000;
-
/**
* A bit more descriptive unit for constructors.
*
@@ -112,21 +104,6 @@ public final class Sort {
}
/**
- * byte[] in unsigned byte order.
- */
- static final Comparator<byte[]> unsignedByteOrderComparator = new Comparator<byte[]>() {
- public int compare(byte[] left, byte[] right) {
- final int max = Math.min(left.length, right.length);
- for (int i = 0, j = 0; i < max; i++, j++) {
- int diff = (left[i] & 0xff) - (right[j] & 0xff);
- if (diff != 0)
- return diff;
- }
- return left.length - right.length;
- }
- };
-
- /**
* Sort info (debugging mostly).
*/
public class SortInfo {
@@ -149,14 +126,15 @@ public final class Sort {
}
}
- private final static byte [][] EMPTY = new byte [0][];
-
private final BufferSize ramBufferSize;
private final File tempDirectory;
-
- private byte [][] buffer = new byte [0][];
+
+ private final BytesRefList buffer = new BytesRefList();
private SortInfo sortInfo;
private int maxTempFiles;
+ private final Comparator<BytesRef> comparator;
+
+ public static final Comparator<BytesRef> DEFAULT_COMPARATOR = BytesRef.getUTF8SortedAsUnicodeComparator();
/**
* Defaults constructor.
@@ -165,13 +143,17 @@ public final class Sort {
* @see BufferSize#automatic()
*/
public Sort() throws IOException {
- this(BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+ this(DEFAULT_COMPARATOR, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+ }
+
+ public Sort(Comparator<BytesRef> comparator) throws IOException {
+ this(comparator, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
}
/**
* All-details constructor.
*/
- public Sort(BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
+ public Sort(Comparator<BytesRef> comparator, BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
}
@@ -183,6 +165,7 @@ public final class Sort {
this.ramBufferSize = ramBufferSize;
this.tempDirectory = tempDirectory;
this.maxTempFiles = maxTempfiles;
+ this.comparator = comparator;
}
/**
@@ -283,23 +266,25 @@ public final class Sort {
/** Sort a single partition in-memory. */
protected File sortPartition(int len) throws IOException {
- byte [][] data = this.buffer;
+ BytesRefList data = this.buffer;
File tempFile = File.createTempFile("sort", "partition", tempDirectory);
long start = System.currentTimeMillis();
- Arrays.sort(data, 0, len, unsignedByteOrderComparator);
sortInfo.sortTime += (System.currentTimeMillis() - start);
- ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+ final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+ BytesRef spare;
try {
- for (int i = 0; i < len; i++) {
- assert data[i].length <= Short.MAX_VALUE;
- out.write(data[i]);
+ BytesRefIterator iter = buffer.iterator(comparator);
+ while((spare = iter.next()) != null) {
+ assert spare.length <= Short.MAX_VALUE;
+ out.write(spare);
}
+
out.close();
// Clean up the buffer for the next partition.
- this.buffer = EMPTY;
+ data.clear();
return tempFile;
} finally {
IOUtils.close(out);
@@ -314,7 +299,7 @@ public final class Sort {
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
protected boolean lessThan(FileAndTop a, FileAndTop b) {
- return a.current.compareTo(b.current) < 0;
+ return comparator.compare(a.current, b.current) < 0;
}
};
@@ -359,33 +344,18 @@ public final class Sort {
/** Read in a single partition of data */
int readPartition(ByteSequencesReader reader) throws IOException {
long start = System.currentTimeMillis();
-
- // We will be reallocating from scratch.
- Arrays.fill(this.buffer, null);
-
- int bytesLimit = this.ramBufferSize.bytes;
- byte [][] data = this.buffer;
- byte[] line;
- int linesRead = 0;
- while ((line = reader.read()) != null) {
- if (linesRead + 1 >= data.length) {
- data = Arrays.copyOf(data,
- ArrayUtil.oversize(linesRead + MIN_EXPECTED_GROWTH,
- RamUsageEstimator.NUM_BYTES_OBJECT_REF));
- }
- data[linesRead++] = line;
-
+ final BytesRef scratch = new BytesRef();
+ while ((scratch.bytes = reader.read()) != null) {
+ scratch.length = scratch.bytes.length;
+ buffer.append(scratch);
// Account for the created objects.
// (buffer slots do not account to buffer size.)
- bytesLimit -= line.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
- if (bytesLimit < 0) {
+ if (ramBufferSize.bytes < buffer.bytesUsed()) {
break;
}
}
- this.buffer = data;
-
sortInfo.readTime += (System.currentTimeMillis() - start);
- return linesRead;
+ return buffer.size();
}
static class FileAndTop {
@@ -515,5 +485,9 @@ public final class Sort {
((Closeable) is).close();
}
}
+ }
+
+ public Comparator<BytesRef> getComparator() {
+ return comparator;
}
}
\ No newline at end of file