You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2010/07/22 21:34:52 UTC
svn commit: r966819 [4/20] - in /lucene/dev/branches/realtime_search: ./
lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/benchmark/conf/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucene/contrib/benchmark/src/j...
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java Thu Jul 22 19:34:35 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/**
@@ -299,7 +300,7 @@ public class PrecedenceQueryParser imple
/**
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field, String queryText) throws ParseException {
+ protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
@@ -307,7 +308,7 @@ public class PrecedenceQueryParser imple
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+ TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@@ -328,17 +329,25 @@ public class PrecedenceQueryParser imple
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
- return new TermQuery(new Term(field, termAtt.toString()));
+ BytesRef term = new BytesRef();
+ termAtt.toBytesRef(term);
+ return new TermQuery(new Term(field, term));
} else {
- if (severalTokensAtSamePosition) {
- if (positionCount == 1) {
+ if (severalTokensAtSamePosition || !quoted) {
+ if (positionCount == 1 || !quoted) {
// no phrase query:
- BooleanQuery q = new BooleanQuery();
+ BooleanQuery q = new BooleanQuery(positionCount == 1);
+
+ BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
+ BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
+
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
+ termAtt.toBytesRef(term);
TermQuery currentQuery = new TermQuery(
- new Term(field, termAtt.toString()));
- q.add(currentQuery, BooleanClause.Occur.SHOULD);
+ new Term(field, term));
+ q.add(currentQuery, occur);
}
return q;
}
@@ -347,12 +356,14 @@ public class PrecedenceQueryParser imple
MultiPhraseQuery mpq = new MultiPhraseQuery();
List<Term> multiTerms = new ArrayList<Term>();
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
- multiTerms.add(new Term(field, termAtt.toString()));
+ termAtt.toBytesRef(term);
+ multiTerms.add(new Term(field, term));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@@ -362,8 +373,10 @@ public class PrecedenceQueryParser imple
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
- q.add(new Term(field, termAtt.toString()));
+ termAtt.toBytesRef(term);
+ q.add(new Term(field, term));
}
return q;
}
@@ -371,7 +384,7 @@ public class PrecedenceQueryParser imple
}
/**
- * Base implementation delegates to {@link #getFieldQuery(String,String)}.
+ * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
@@ -379,7 +392,7 @@ public class PrecedenceQueryParser imple
*/
protected Query getFieldQuery(String field, String queryText, int slop)
throws ParseException {
- Query query = getFieldQuery(field, queryText);
+ Query query = getFieldQuery(field, queryText, true);
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
@@ -847,7 +860,7 @@ public class PrecedenceQueryParser imple
}
q = getFuzzyQuery(field, termImage, fms);
} else {
- q = getFieldQuery(field, termImage);
+ q = getFieldQuery(field, termImage, false);
}
break;
case RANGEIN_START:
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj Thu Jul 22 19:34:35 2010
@@ -48,6 +48,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/**
@@ -323,7 +324,7 @@ public class PrecedenceQueryParser {
/**
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field, String queryText) throws ParseException {
+ protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
@@ -331,7 +332,7 @@ public class PrecedenceQueryParser {
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+ TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@@ -352,17 +353,25 @@ public class PrecedenceQueryParser {
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
- return new TermQuery(new Term(field, termAtt.toString()));
+ BytesRef term = new BytesRef();
+ termAtt.toBytesRef(term);
+ return new TermQuery(new Term(field, term));
} else {
- if (severalTokensAtSamePosition) {
- if (positionCount == 1) {
+ if (severalTokensAtSamePosition || !quoted) {
+ if (positionCount == 1 || !quoted) {
// no phrase query:
- BooleanQuery q = new BooleanQuery();
+ BooleanQuery q = new BooleanQuery(positionCount == 1);
+
+ BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
+ BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
+
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
+ termAtt.toBytesRef(term);
TermQuery currentQuery = new TermQuery(
- new Term(field, termAtt.toString()));
- q.add(currentQuery, BooleanClause.Occur.SHOULD);
+ new Term(field, term));
+ q.add(currentQuery, occur);
}
return q;
}
@@ -371,12 +380,14 @@ public class PrecedenceQueryParser {
MultiPhraseQuery mpq = new MultiPhraseQuery();
List<Term> multiTerms = new ArrayList<Term>();
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
- multiTerms.add(new Term(field, termAtt.toString()));
+ termAtt.toBytesRef(term);
+ multiTerms.add(new Term(field, term));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@@ -386,8 +397,10 @@ public class PrecedenceQueryParser {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
+ BytesRef term = new BytesRef();
source.restoreState(list.get(i));
- q.add(new Term(field, termAtt.toString()));
+ termAtt.toBytesRef(term);
+ q.add(new Term(field, term));
}
return q;
}
@@ -395,7 +408,7 @@ public class PrecedenceQueryParser {
}
/**
- * Base implementation delegates to {@link #getFieldQuery(String,String)}.
+ * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
@@ -403,7 +416,7 @@ public class PrecedenceQueryParser {
*/
protected Query getFieldQuery(String field, String queryText, int slop)
throws ParseException {
- Query query = getFieldQuery(field, queryText);
+ Query query = getFieldQuery(field, queryText, true);
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
@@ -830,7 +843,7 @@ Query Term(String field) : {
}
q = getFuzzyQuery(field, termImage, fms);
} else {
- q = getFieldQuery(field, termImage);
+ q = getFieldQuery(field, termImage, false);
}
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java Thu Jul 22 19:34:35 2010
@@ -22,6 +22,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/** Token Manager. */
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java Thu Jul 22 19:34:35 2010
@@ -448,10 +448,16 @@ public class QueryParserWrapper {
throw new UnsupportedOperationException();
}
+ /** @deprecated Use {@link #getFieldQuery(String, String, boolean)} instead */
+ @Deprecated
+ protected Query getFieldQuery(String field, String queryText) throws ParseException {
+ return getFieldQuery(field, queryText, true);
+ }
+
/**
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field, String queryText)
+ protected Query getFieldQuery(String field, String queryText, boolean quoted)
throws ParseException {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java Thu Jul 22 19:34:35 2010
@@ -36,6 +36,7 @@ import org.apache.lucene.queryParser.cor
import org.apache.lucene.queryParser.core.nodes.NoTokenFoundQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
+import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
@@ -187,8 +188,8 @@ public class AnalyzerQueryNodeProcessor
return fieldNode;
- } else if (severalTokensAtSamePosition) {
- if (positionCount == 1) {
+ } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
+ if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
// no phrase query:
LinkedList<QueryNode> children = new LinkedList<QueryNode>();
@@ -206,9 +207,11 @@ public class AnalyzerQueryNodeProcessor
children.add(new FieldQueryNode(field, term, -1, -1));
}
-
- return new GroupQueryNode(
+ if (positionCount == 1)
+ return new GroupQueryNode(
new StandardBooleanQueryNode(children, true));
+ else
+ return new StandardBooleanQueryNode(children, false);
} else {
// phrase query:
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java Thu Jul 22 19:34:35 2010
@@ -53,12 +53,10 @@ Operations:
*/
import java.io.IOException;
-import java.util.HashMap;
import java.util.Iterator;
-
+import java.util.HashMap;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
@@ -84,10 +82,6 @@ public class SpanNearClauseFactory {
public BasicQueryFactory getBasicQueryFactory() {return qf;}
- public TermEnum getTermEnum(String termText) throws IOException {
- return getIndexReader().terms(new Term(getFieldName(), termText));
- }
-
public int size() {return weightBySpanQuery.size();}
public void clear() {weightBySpanQuery.clear();}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Thu Jul 22 19:34:35 2010
@@ -23,9 +23,12 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.DateTools;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -280,6 +283,90 @@ public class TestPrecedenceQueryParser e
assertQueryEquals("term term1 term2", a, "term term1 term2");
}
+ //individual CJK chars as terms, like StandardAnalyzer
+ private class SimpleCJKTokenizer extends Tokenizer {
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ public SimpleCJKTokenizer(Reader input) {
+ super(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ int ch = input.read();
+ if (ch < 0)
+ return false;
+ clearAttributes();
+ termAtt.setEmpty().append((char) ch);
+ return true;
+ }
+ }
+
+ private class SimpleCJKAnalyzer extends Analyzer {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new SimpleCJKTokenizer(reader);
+ }
+ }
+
+ public void testCJKTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½", analyzer));
+ }
+
+ public void testCJKBoostedTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.setBoost(0.5f);
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½^0.5", analyzer));
+ }
+
+ public void testCJKPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"", analyzer));
+ }
+
+ public void testCJKBoostedPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setBoost(0.5f);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"^0.5", analyzer));
+ }
+
+ public void testCJKSloppyPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setSlop(3);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"~3", analyzer));
+ }
+
// failing tests disabled since PrecedenceQueryParser
// is currently unmaintained
public void _testWildcard() throws Exception {
@@ -353,11 +440,11 @@ public class TestPrecedenceQueryParser e
assertQueryEquals("term -stop term", qpAnalyzer, "term term");
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
- "term \"phrase1 phrase2\" term");
+ "term (phrase1 phrase2) term");
// note the parens in this next assertion differ from the original
// QueryParser behavior
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
- "(+term -\"phrase1 phrase2\") term");
+ "(+term -(phrase1 phrase2)) term");
assertQueryEquals("stop", qpAnalyzer, "");
assertQueryEquals("stop OR stop AND stop", qpAnalyzer, "");
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Thu Jul 22 19:34:35 2010
@@ -37,6 +37,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -57,6 +58,7 @@ import org.apache.lucene.queryParser.cor
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -331,6 +333,90 @@ public class TestQPHelper extends Locali
assertQueryEqualsAllowLeadingWildcard("??\u3000??\u3000??", null, "??\u0020??\u0020??");
}
+ //individual CJK chars as terms, like StandardAnalyzer
+ private class SimpleCJKTokenizer extends Tokenizer {
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ public SimpleCJKTokenizer(Reader input) {
+ super(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ int ch = input.read();
+ if (ch < 0)
+ return false;
+ clearAttributes();
+ termAtt.setEmpty().append((char) ch);
+ return true;
+ }
+ }
+
+ private class SimpleCJKAnalyzer extends Analyzer {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new SimpleCJKTokenizer(reader);
+ }
+ }
+
+ public void testCJKTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½", analyzer));
+ }
+
+ public void testCJKBoostedTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.setBoost(0.5f);
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½^0.5", analyzer));
+ }
+
+ public void testCJKPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"", analyzer));
+ }
+
+ public void testCJKBoostedPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setBoost(0.5f);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"^0.5", analyzer));
+ }
+
+ public void testCJKSloppyPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setSlop(3);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"~3", analyzer));
+ }
+
public void testSimple() throws Exception {
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("term term term", null, "term term term");
@@ -529,10 +615,10 @@ public class TestQPHelper extends Locali
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
- "term \"phrase1 phrase2\" term");
+ "term phrase1 phrase2 term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
- "+term -\"phrase1 phrase2\" term");
+ "+term -(phrase1 phrase2) term");
assertQueryEquals("stop^3", qpAnalyzer, "");
assertQueryEquals("stop", qpAnalyzer, "");
Modified: lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Thu Jul 22 19:34:35 2010
@@ -35,6 +35,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
@@ -53,6 +54,7 @@ import org.apache.lucene.queryParser.cor
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryParser.standard.processors.WildcardQueryNodeProcessor;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -323,6 +325,90 @@ public class TestQueryParserWrapper exte
assertQueryEqualsAllowLeadingWildcard("??\u3000??\u3000??", null, "??\u0020??\u0020??");
}
+ //individual CJK chars as terms, like StandardAnalyzer
+ private class SimpleCJKTokenizer extends Tokenizer {
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ public SimpleCJKTokenizer(Reader input) {
+ super(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ int ch = input.read();
+ if (ch < 0)
+ return false;
+ clearAttributes();
+ termAtt.setEmpty().append((char) ch);
+ return true;
+ }
+ }
+
+ private class SimpleCJKAnalyzer extends Analyzer {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new SimpleCJKTokenizer(reader);
+ }
+ }
+
+ public void testCJKTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½", analyzer));
+ }
+
+ public void testCJKBoostedTerm() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ BooleanQuery expected = new BooleanQuery();
+ expected.setBoost(0.5f);
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+
+ assertEquals(expected, getQuery("ä¸å½^0.5", analyzer));
+ }
+
+ public void testCJKPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"", analyzer));
+ }
+
+ public void testCJKBoostedPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setBoost(0.5f);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"^0.5", analyzer));
+ }
+
+ public void testCJKSloppyPhrase() throws Exception {
+ // individual CJK chars as terms
+ SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
+
+ PhraseQuery expected = new PhraseQuery();
+ expected.setSlop(3);
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term("field", "å½"));
+
+ assertEquals(expected, getQuery("\"ä¸å½\"~3", analyzer));
+ }
+
public void testSimple() throws Exception {
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("term term term", null, "term term term");
@@ -528,10 +614,10 @@ public class TestQueryParserWrapper exte
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
- "term \"phrase1 phrase2\" term");
+ "term phrase1 phrase2 term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
- "+term -\"phrase1 phrase2\" term");
+ "+term -(phrase1 phrase2) term");
assertQueryEquals("stop^3", qpAnalyzer, "");
assertQueryEquals("stop", qpAnalyzer, "");
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java Thu Jul 22 19:34:35 2010
@@ -25,9 +25,9 @@ import org.apache.lucene.search.FieldCac
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.FilteredDocIdSet;
+import org.apache.lucene.spatial.DistanceUtils;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.spatial.tier.DistanceFilter;
-import org.apache.lucene.spatial.tier.DistanceUtils;
/** <p><font color="red"><b>NOTE:</b> This API is still in
@@ -90,7 +90,7 @@ public class GeoHashDistanceFilter exten
if (cachedDistance != null) {
d = cachedDistance.doubleValue();
} else {
- d = DistanceUtils.getInstance().getDistanceMi(lat, lng, x, y);
+ d = DistanceUtils.getDistanceMi(lat, lng, x, y);
distanceLookupCache.put(geoHash, d);
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geometry/DistanceUnits.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geometry/DistanceUnits.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geometry/DistanceUnits.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geometry/DistanceUnits.java Thu Jul 22 19:34:35 2010
@@ -55,11 +55,11 @@ public enum DistanceUnits {
* @throws IllegalArgumentException if no DistanceUnit which represents the given unit is found
*/
public static DistanceUnits findDistanceUnit(String unit) {
- if (MILES.getUnit().equals(unit)) {
+ if (MILES.getUnit().equalsIgnoreCase(unit) || unit.equalsIgnoreCase("mi")) {
return MILES;
}
- if (KILOMETERS.getUnit().equals(unit)) {
+ if (KILOMETERS.getUnit().equalsIgnoreCase(unit)) {
return KILOMETERS;
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceHandler.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@
package org.apache.lucene.spatial.tier;
+import org.apache.lucene.spatial.DistanceUtils;
+
import java.util.HashMap;
import java.util.Map;
@@ -68,7 +70,7 @@ public class DistanceHandler {
// check to see if we have distances
// if not calculate the distance
if(distances == null){
- return DistanceUtils.getInstance().getDistanceMi(centerLat, centerLng, lat, lng);
+ return DistanceUtils.getDistanceMi(centerLat, centerLng, lat, lng);
}
// check to see if the doc id has a cached distance
@@ -93,7 +95,7 @@ public class DistanceHandler {
}
//all else fails calculate the distances
- return DistanceUtils.getInstance().getDistanceMi(centerLat, centerLng, lat, lng);
+ return DistanceUtils.getDistanceMi(centerLat, centerLng, lat, lng);
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java Thu Jul 22 19:34:35 2010
@@ -23,6 +23,7 @@ import org.apache.lucene.search.Filtered
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.spatial.DistanceUtils;
/**
@@ -88,7 +89,7 @@ public class LatLongDistanceFilter exten
if (cachedDistance != null){
d = cachedDistance.doubleValue();
} else {
- d = DistanceUtils.getInstance().getDistanceMi(lat, lng, x, y);
+ d = DistanceUtils.getDistanceMi(lat, lng, x, y);
distanceLookupCache.put(ck, d);
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/projections/SinusoidalProjector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/projections/SinusoidalProjector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/projections/SinusoidalProjector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/projections/SinusoidalProjector.java Thu Jul 22 19:34:35 2010
@@ -20,18 +20,23 @@ package org.apache.lucene.spatial.tier.p
/**
* Based on Sinusoidal Projections
* Project a latitude / longitude on a 2D cartesian map
+ * <p/>
+ * THIS PROJECTION IS WRONG, but it's not going to be fixed b/c it will break a lot of existing tests, plus we are deprecating
+ * most of the existing spatial and replacing with a more reliable approach.
*
* <p><font color="red"><b>NOTE:</b> This API is still in
* flux and might change in incompatible ways in the next
* release.</font>
+ *
+ * @deprecated Until we can put in place proper tests and a proper fix.
*/
public class SinusoidalProjector implements IProjector {
-
+
public String coordsAsString(double latitude, double longitude) {
return null;
}
-
+
public double[] coords(double latitude, double longitude) {
double rlat = Math.toRadians(latitude);
double rlong = Math.toRadians(longitude);
@@ -42,3 +47,39 @@ public class SinusoidalProjector impleme
}
}
+
+/*
+This whole file should really be:*/
+
+/**
+ * Based on Sinusoidal Projections
+ * Project a latitude / longitude on a 2D cartesian map using the Prime Meridian as the "central meridian"
+ *
+ * See http://en.wikipedia.org/wiki/Sinusoidal_projection
+ *
+ * <p><font color="red"><b>NOTE:</b> This API is still in
+ * flux and might change in incompatible ways in the next
+ * release.</font>
+ */
+/*
+public class SinusoidalProjector implements IProjector {
+
+
+ public String coordsAsString(double latitude, double longitude) {
+ double [] coords = coords(latitude, longitude);
+ return coords[0] + "," + coords[1];
+ }
+
+ public double[] coords(double latitude, double longitude) {
+ double rlat = latitude * DistanceUtils.DEGREES_TO_RADIANS;
+ double rlong = longitude * DistanceUtils.DEGREES_TO_RADIANS;
+ double x = rlong * Math.cos(rlat);
+ return new double[]{x, rlat};
+
+ }
+
+}
+*/
+
+
+
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/DistanceCheck.java Thu Jul 22 19:34:35 2010
@@ -16,6 +16,8 @@
*/
package org.apache.lucene.spatial.tier;
+import org.apache.lucene.spatial.DistanceUtils;
+
import java.text.DecimalFormat;
@@ -31,7 +33,7 @@ public class DistanceCheck {
double long2 = 0;
for (int i =0; i < 90; i++){
- double dis = DistanceUtils.getInstance().getDistanceMi(lat1, long1, lat2, long2);
+ double dis = DistanceUtils.getDistanceMi(lat1, long1, lat2, long2);
lat1 +=1;
lat2 = lat1 + 0.001;
Modified: lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java Thu Jul 22 19:34:35 2010
@@ -40,6 +40,7 @@ import org.apache.lucene.search.function
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery.Type;
+import org.apache.lucene.spatial.DistanceUtils;
import org.apache.lucene.spatial.geohash.GeoHashUtils;
import org.apache.lucene.spatial.geometry.DistanceUnits;
import org.apache.lucene.spatial.geometry.FloatLatLng;
@@ -278,8 +279,8 @@ public class TestCartesian extends Lucen
double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
- double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
- double llm = DistanceUtils.getInstance().getLLMDistance(lat, lng, rsLat, rsLng);
+ double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng);
+ double llm = DistanceUtils.getLLMDistance(lat, lng, rsLat, rsLng);
if (VERBOSE) System.out.println("Name: "+ name +", Distance "+ distance); //(res, ortho, harvesine):"+ distance +" |"+ geo_distance +"|"+ llm +" | score "+ hits.score(i));
assertTrue(Math.abs((distance - llm)) < 1);
assertTrue((distance < miles ));
@@ -372,8 +373,8 @@ public class TestCartesian extends Lucen
double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
- double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
- double llm = DistanceUtils.getInstance().getLLMDistance(lat, lng, rsLat, rsLng);
+ double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng);
+ double llm = DistanceUtils.getLLMDistance(lat, lng, rsLat, rsLng);
if (VERBOSE) System.out.println("Name: "+ name +", Distance "+ distance); //(res, ortho, harvesine):"+ distance +" |"+ geo_distance +"|"+ llm +" | score "+ hits.score(i));
assertTrue(Math.abs((distance - llm)) < 1);
if (VERBOSE) System.out.println("checking limit "+ distance + " < " + miles);
@@ -467,8 +468,8 @@ public class TestCartesian extends Lucen
double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
- double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
- double llm = DistanceUtils.getInstance().getLLMDistance(lat, lng, rsLat, rsLng);
+ double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng);
+ double llm = DistanceUtils.getLLMDistance(lat, lng, rsLat, rsLng);
if (VERBOSE) System.out.println("Name: "+ name +", Distance "+ distance); //(res, ortho, harvesine):"+ distance +" |"+ geo_distance +"|"+ llm +" | score "+ hits.score(i));
assertTrue(Math.abs((distance - llm)) < 1);
assertTrue((distance < miles ));
@@ -561,8 +562,8 @@ public class TestCartesian extends Lucen
double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
- double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
- double llm = DistanceUtils.getInstance().getLLMDistance(lat, lng, rsLat, rsLng);
+ double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng);
+ double llm = DistanceUtils.getLLMDistance(lat, lng, rsLat, rsLng);
if (VERBOSE) System.out.println("Name: "+ name +", Distance (res, ortho, harvesine):"+ distance +" |"+ geo_distance +"|"+ llm +" | score "+ scoreDocs[i].score);
assertTrue(Math.abs((distance - llm)) < 1);
assertTrue((distance < miles ));
Modified: lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java Thu Jul 22 19:34:35 2010
@@ -6,11 +6,12 @@ import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
@@ -56,10 +57,12 @@ public class SpanOrTermsBuilder extends
{
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
while (ts.incrementToken()) {
- SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
+ BytesRef term = new BytesRef();
+ termAtt.toBytesRef(term);
+ SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, term));
clausesList.add(stq);
}
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
Modified: lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Thu Jul 22 19:34:35 2010
@@ -5,10 +5,11 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermsFilter;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.ParserException;
@@ -57,19 +58,21 @@ public class TermsFilterBuilder implemen
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try
{
Term term = null;
while (ts.incrementToken()) {
+ BytesRef bytes = new BytesRef();
+ termAtt.toBytesRef(bytes);
if (term == null)
{
- term = new Term(fieldName, termAtt.toString());
+ term = new Term(fieldName, bytes);
} else
{
// create from previous to save fieldName.intern overhead
- term = term.createTerm(termAtt.toString());
+ term = term.createTerm(bytes);
}
tf.addTerm(term);
}
Modified: lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java Thu Jul 22 19:34:35 2010
@@ -5,12 +5,13 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
@@ -57,16 +58,18 @@ public class TermsQueryBuilder implement
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
while (ts.incrementToken()) {
+ BytesRef bytes = new BytesRef();
+ termAtt.toBytesRef(bytes);
if (term == null)
{
- term = new Term(fieldName, termAtt.toString());
+ term = new Term(fieldName, bytes);
} else
{
// create from previous to save fieldName.intern overhead
- term = term.createTerm(termAtt.toString());
+ term = term.createTerm(bytes);
}
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
}
Modified: lucene/dev/branches/realtime_search/lucene/docs/demo2.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/docs/demo2.html?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/docs/demo2.html (original)
+++ lucene/dev/branches/realtime_search/lucene/docs/demo2.html Thu Jul 22 19:34:35 2010
@@ -316,7 +316,7 @@ stop words and characters from the index
words such as articles (a, an, the, etc.) and other strings that may have less value for searching
(e.g. <b>'s</b>) . It should be noted that there are different rules for every language, and you
should use the proper analyzer for each. Lucene currently provides Analyzers for a number of
-different languages (see the <span class="codefrag">*Analyzer.java</span> sources under <a href="http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/">contrib/analyzers/src/java/org/apache/lucene/analysis</a>).
+different languages (see the <span class="codefrag">*Analyzer.java</span> sources under <a href="http://svn.apache.org/repos/asf/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/">modules/analysis/common/src/java/org/apache/lucene/analysis</a>).
</p>
<p>
Looking further down in the file, you should see the <span class="codefrag">indexDocs()</span> code. This recursive
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/Token.java Thu Jul 22 19:34:35 2010
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
-import org.apache.lucene.index.TermPositions; // for javadoc
+import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeImpl;
@@ -45,8 +45,8 @@ import org.apache.lucene.util.AttributeI
with type "eos". The default token type is "word".
<p>
A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
- length byte array. Use {@link TermPositions#getPayloadLength()} and
- {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index.
+ length byte array. Use {@link DocsAndPositionsEnum#getPayloadLength()} and
+ {@link DocsAndPositionsEnum#getPayload(byte[], int)} to retrieve the payloads from the index.
<br><br>
Propchange: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java:943137,949730
+/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java:953476-966816
/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/Tokenizer.java:824912-931101
/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java:909334,948516
/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:924483-924731,924781,925176-925462
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/package.html?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/analysis/package.html Thu Jul 22 19:34:35 2010
@@ -105,7 +105,7 @@ There are many post tokenization steps t
<li>{@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper} – Most Analyzers perform the same operation on all
{@link org.apache.lucene.document.Field}s. The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different
{@link org.apache.lucene.document.Field}s.</li>
- <li>The contrib/analyzers library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
+ <li>The modules/analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
of different problems related to searching. Many of the Analyzers are designed to analyze non-English languages.</li>
<li>The contrib/snowball library
located at the root of the Lucene distribution has Analyzer and TokenFilter
@@ -168,7 +168,7 @@ There are many post tokenization steps t
<h2>Implementing your own Analyzer</h2>
<p>Creating your own Analyzer is straightforward. It usually involves either wrapping an existing Tokenizer and set of TokenFilters to create a new Analyzer
or creating both the Analyzer and a Tokenizer or TokenFilter. Before pursuing this approach, you may find it worthwhile
-to explore the contrib/analyzers library and/or ask on the java-user@lucene.apache.org mailing list first to see if what you need already exists.
+to explore the modules/analysis library and/or ask on the java-user@lucene.apache.org mailing list first to see if what you need already exists.
If you are still committed to creating your own Analyzer or TokenStream derivation (Tokenizer or TokenFilter) have a look at
the source code of any one of the many samples located in this package.
</p>
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Jul 22 19:34:35 2010
@@ -63,5 +63,14 @@ public enum FieldSelectorResult {
SIZE,
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */
- SIZE_AND_BREAK
+ SIZE_AND_BREAK,
+
+ /**
+ * Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until
+ * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
+ * return a valid instance of a {@link Fieldable}.
+ *<p/>
+ * {@link Document#add(Fieldable)} should be called by the Reader.
+ */
+ LATENT
}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java Thu Jul 22 19:34:35 2010
@@ -24,6 +24,7 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.document.AbstractField; // for javadocs
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -32,7 +33,7 @@ import java.io.PrintStream;
import java.io.IOException;
import java.io.File;
import java.util.Collection;
-
+import java.util.Comparator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
@@ -343,12 +344,15 @@ public class CheckIndex {
String sFormat = "";
boolean skip = false;
- if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+ if (format == DefaultSegmentInfosWriter.FORMAT_DIAGNOSTICS) {
sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
- else if (format == SegmentInfos.FORMAT_4_0)
- sFormat = "FORMAT_FLEX_POSTINGS [Lucene 4.0]";
- else if (format < SegmentInfos.CURRENT_FORMAT) {
- sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ } else if (format == DefaultSegmentInfosWriter.FORMAT_4_0) {
+ sFormat = "FORMAT_4_0 [Lucene 4.0]";
+ } else if (format < DefaultSegmentInfosWriter.FORMAT_CURRENT) {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool supports]";
+ skip = true;
+ } else if (format > DefaultSegmentInfosWriter.FORMAT_MINIMUM) {
+ sFormat = "int=" + format + " [older version of Lucene than this tool supports]";
skip = true;
}
@@ -596,6 +600,10 @@ public class CheckIndex {
boolean hasOrd = true;
final long termCountStart = status.termCount;
+ BytesRef lastTerm = null;
+
+ Comparator<BytesRef> termComp = terms.getComparator();
+
while(true) {
final BytesRef term = terms.next();
@@ -603,6 +611,17 @@ public class CheckIndex {
break;
}
+ // make sure terms arrive in order according to
+ // the comp
+ if (lastTerm == null) {
+ lastTerm = new BytesRef(term);
+ } else {
+ if (termComp.compare(lastTerm, term) >= 0) {
+ throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
+ }
+ lastTerm.copy(term);
+ }
+
final int docFreq = terms.docFreq();
status.totFreq += docFreq;
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Thu Jul 22 19:34:35 2010
@@ -457,7 +457,6 @@ public class ConcurrentMergeScheduler ex
/** Called when an exception is hit in a background merge
* thread */
protected void handleMergeException(Throwable exc) {
- System.out.println("HANDLE " + exc);
try {
// When an exception is hit during merge, IndexWriter
// removes any partial files and then allows another
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Thu Jul 22 19:34:35 2010
@@ -71,6 +71,11 @@ class DirectoryReader extends IndexReade
private int numDocs = -1;
private boolean hasDeletions = false;
+ // Max version in index as of when we opened; this can be
+ // > our current segmentInfos version in case we were
+ // opened on a past IndexCommit:
+ private long maxIndexVersion;
+
// static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
// final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
// return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null);
@@ -359,6 +364,10 @@ class DirectoryReader extends IndexReade
}
}
starts[subReaders.length] = maxDoc;
+
+ if (!readOnly) {
+ maxIndexVersion = SegmentInfos.readCurrentVersion(directory, codecs);
+ }
}
@Override
@@ -686,28 +695,6 @@ class DirectoryReader extends IndexReade
}
@Override
- public TermEnum terms() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms();
- } else {
- return new MultiTermEnum(this, subReaders, starts, null);
- }
- }
-
- @Override
- public TermEnum terms(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms(term);
- } else {
- return new MultiTermEnum(this, subReaders, starts, term);
- }
- }
-
- @Override
public int docFreq(Term t) throws IOException {
ensureOpen();
int total = 0; // sum freqs in segments
@@ -727,43 +714,10 @@ class DirectoryReader extends IndexReade
}
@Override
- public TermDocs termDocs() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs();
- } else {
- return new MultiTermDocs(this, subReaders, starts);
- }
- }
-
- @Override
- public TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs(term);
- } else {
- return super.termDocs(term);
- }
- }
-
- @Override
public Fields fields() throws IOException {
throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)");
}
- @Override
- public TermPositions termPositions() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termPositions();
- } else {
- return new MultiTermPositions(this, subReaders, starts);
- }
- }
-
/**
* Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory
* owner.
@@ -798,7 +752,7 @@ class DirectoryReader extends IndexReade
// we have to check whether index has changed since this reader was opened.
// if so, this reader is no longer valid for deletion
- if (SegmentInfos.readCurrentVersion(directory, codecs) > segmentInfos.getVersion()) {
+ if (SegmentInfos.readCurrentVersion(directory, codecs) > maxIndexVersion) {
stale = true;
this.writeLock.release();
this.writeLock = null;
@@ -830,6 +784,7 @@ class DirectoryReader extends IndexReade
IndexFileDeleter deleter = new IndexFileDeleter(directory,
deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
segmentInfos, null, null, codecs);
+ segmentInfos.updateGeneration(deleter.getLastSegmentInfos());
// Checkpoint the state we are about to change, in
// case we have to roll back:
@@ -868,6 +823,8 @@ class DirectoryReader extends IndexReade
deleter.checkpoint(segmentInfos, true);
deleter.close();
+ maxIndexVersion = segmentInfos.getVersion();
+
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;
@@ -1122,280 +1079,4 @@ class DirectoryReader extends IndexReade
throw new UnsupportedOperationException("This IndexCommit does not support deletions");
}
}
-
- // @deprecated This is pre-flex API
- // Exposes pre-flex API by doing on-the-fly merging
- // pre-flex API to each segment
- static class MultiTermEnum extends TermEnum {
- IndexReader topReader; // used for matching TermEnum to TermDocs
- private LegacySegmentMergeQueue queue;
-
- private Term term;
- private int docFreq;
- final LegacySegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
-
- public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t)
- throws IOException {
- this.topReader = topReader;
- queue = new LegacySegmentMergeQueue(readers.length);
- matchingSegments = new LegacySegmentMergeInfo[readers.length+1];
- for (int i = 0; i < readers.length; i++) {
- IndexReader reader = readers[i];
- TermEnum termEnum;
-
- if (t != null) {
- termEnum = reader.terms(t);
- } else {
- termEnum = reader.terms();
- }
-
- LegacySegmentMergeInfo smi = new LegacySegmentMergeInfo(starts[i], termEnum, reader);
- smi.ord = i;
- if (t == null ? smi.next() : termEnum.term() != null)
- queue.add(smi); // initialize queue
- else
- smi.close();
- }
-
- if (t != null && queue.size() > 0) {
- next();
- }
- }
-
- @Override
- public boolean next() throws IOException {
- for (int i=0; i<matchingSegments.length; i++) {
- LegacySegmentMergeInfo smi = matchingSegments[i];
- if (smi==null) break;
- if (smi.next())
- queue.add(smi);
- else
- smi.close(); // done with segment
- }
-
- int numMatchingSegments = 0;
- matchingSegments[0] = null;
-
- LegacySegmentMergeInfo top = queue.top();
-
- if (top == null) {
- term = null;
- return false;
- }
-
- term = top.term;
- docFreq = 0;
-
- while (top != null && term.compareTo(top.term) == 0) {
- matchingSegments[numMatchingSegments++] = top;
- queue.pop();
- docFreq += top.termEnum.docFreq(); // increment freq
- top = queue.top();
- }
-
- matchingSegments[numMatchingSegments] = null;
- return true;
- }
-
- @Override
- public Term term() {
- return term;
- }
-
- @Override
- public int docFreq() {
- return docFreq;
- }
-
- @Override
- public void close() throws IOException {
- queue.close();
- }
- }
-
- // @deprecated This is pre-flex API
- // Exposes pre-flex API by doing on-the-fly merging
- // pre-flex API to each segment
- static class MultiTermDocs implements TermDocs {
- IndexReader topReader; // used for matching TermEnum to TermDocs
- protected IndexReader[] readers;
- protected int[] starts;
- protected Term term;
-
- protected int base = 0;
- protected int pointer = 0;
-
- private TermDocs[] readerTermDocs;
- protected TermDocs current; // == readerTermDocs[pointer]
-
- private MultiTermEnum tenum; // the term enum used for seeking... can be null
- int matchingSegmentPos; // position into the matching segments from tenum
- LegacySegmentMergeInfo smi; // current segment mere info... can be null
-
- public MultiTermDocs(IndexReader topReader, IndexReader[] r, int[] s) {
- this.topReader = topReader;
- readers = r;
- starts = s;
-
- readerTermDocs = new TermDocs[r.length];
- }
-
- public int doc() {
- return base + current.doc();
- }
- public int freq() {
- return current.freq();
- }
-
- public void seek(Term term) {
- this.term = term;
- this.base = 0;
- this.pointer = 0;
- this.current = null;
- this.tenum = null;
- this.smi = null;
- this.matchingSegmentPos = 0;
- }
-
- public void seek(TermEnum termEnum) throws IOException {
- seek(termEnum.term());
- if (termEnum instanceof MultiTermEnum) {
- tenum = (MultiTermEnum)termEnum;
- if (topReader != tenum.topReader)
- tenum = null;
- }
- }
-
- public boolean next() throws IOException {
- for(;;) {
- if (current!=null && current.next()) {
- return true;
- }
- else if (pointer < readers.length) {
- if (tenum != null) {
- smi = tenum.matchingSegments[matchingSegmentPos++];
- if (smi==null) {
- pointer = readers.length;
- return false;
- }
- pointer = smi.ord;
- }
- base = starts[pointer];
- current = termDocs(pointer++);
- } else {
- return false;
- }
- }
- }
-
- /** Optimized implementation. */
- public int read(final int[] docs, final int[] freqs) throws IOException {
- while (true) {
- while (current == null) {
- if (pointer < readers.length) { // try next segment
- if (tenum != null) {
- smi = tenum.matchingSegments[matchingSegmentPos++];
- if (smi==null) {
- pointer = readers.length;
- return 0;
- }
- pointer = smi.ord;
- }
- base = starts[pointer];
- current = termDocs(pointer++);
- } else {
- return 0;
- }
- }
- int end = current.read(docs, freqs);
- if (end == 0) { // none left in segment
- current = null;
- } else { // got some
- final int b = base; // adjust doc numbers
- for (int i = 0; i < end; i++)
- docs[i] += b;
- return end;
- }
- }
- }
-
- /* A Possible future optimization could skip entire segments */
- public boolean skipTo(int target) throws IOException {
- for(;;) {
- if (current != null && current.skipTo(target-base)) {
- return true;
- } else if (pointer < readers.length) {
- if (tenum != null) {
- LegacySegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
- if (smi==null) {
- pointer = readers.length;
- return false;
- }
- pointer = smi.ord;
- }
- base = starts[pointer];
- current = termDocs(pointer++);
- } else
- return false;
- }
- }
-
- private TermDocs termDocs(int i) throws IOException {
- TermDocs result = readerTermDocs[i];
- if (result == null)
- result = readerTermDocs[i] = termDocs(readers[i]);
- if (smi != null) {
- assert(smi.ord == i);
- assert(smi.termEnum.term().equals(term));
- result.seek(smi.termEnum);
- } else {
- result.seek(term);
- }
- return result;
- }
-
- protected TermDocs termDocs(IndexReader reader)
- throws IOException {
- return term==null ? reader.termDocs(null) : reader.termDocs();
- }
-
- public void close() throws IOException {
- for (int i = 0; i < readerTermDocs.length; i++) {
- if (readerTermDocs[i] != null)
- readerTermDocs[i].close();
- }
- }
- }
-
- // @deprecated This is pre-flex API
- // Exposes pre-flex API by doing on-the-fly merging
- // pre-flex API to each segment
- static class MultiTermPositions extends MultiTermDocs implements TermPositions {
- public MultiTermPositions(IndexReader topReader, IndexReader[] r, int[] s) {
- super(topReader,r,s);
- }
-
- @Override
- protected TermDocs termDocs(IndexReader reader) throws IOException {
- return reader.termPositions();
- }
-
- public int nextPosition() throws IOException {
- return ((TermPositions)current).nextPosition();
- }
-
- public int getPayloadLength() throws IOException {
- return ((TermPositions)current).getPayloadLength();
- }
-
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- return ((TermPositions)current).getPayload(data, offset);
- }
-
-
- // TODO: Remove warning after API has been finalized
- public boolean isPayloadAvailable() {
- return ((TermPositions) current).isPayloadAvailable();
- }
- }
}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Thu Jul 22 19:34:35 2010
@@ -30,7 +30,8 @@ public abstract class DocsAndPositionsEn
public abstract int nextPosition() throws IOException;
/** Returns the payload at this position, or null if no
- * payload was indexed. */
+ * payload was indexed. Only call this once per
+ * position. */
public abstract BytesRef getPayload() throws IOException;
public abstract boolean hasPayload();
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jul 22 19:34:35 2010
@@ -18,7 +18,6 @@ import java.util.concurrent.locks.Reentr
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.DocumentsWriterThreadPool.ThreadState;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java Thu Jul 22 19:34:35 2010
@@ -39,7 +39,10 @@ public final class FieldInfos {
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
- static final int CURRENT_FORMAT = FORMAT_START;
+ // whenever you add a new format, make it 1 smaller (negative version logic)!
+ static final int FORMAT_CURRENT = FORMAT_START;
+
+ static final int FORMAT_MINIMUM = FORMAT_START;
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
@@ -53,7 +56,7 @@ public final class FieldInfos {
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
private int format;
- FieldInfos() { }
+ public FieldInfos() { }
/**
* Construct a FieldInfos object using the directory and the name of the file
@@ -62,7 +65,7 @@ public final class FieldInfos {
* @param name The name of the file to open the IndexInput from in the Directory
* @throws IOException
*/
- FieldInfos(Directory d, String name) throws IOException {
+ public FieldInfos(Directory d, String name) throws IOException {
IndexInput input = d.openInput(name);
try {
read(input, name);
@@ -286,7 +289,7 @@ public final class FieldInfos {
}
public void write(IndexOutput output) throws IOException {
- output.writeVInt(CURRENT_FORMAT);
+ output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
for (int i = 0; i < size(); i++) {
FieldInfo fi = fieldInfo(i);
@@ -307,8 +310,11 @@ public final class FieldInfos {
private void read(IndexInput input, String fileName) throws IOException {
format = input.readVInt();
- if (format > FORMAT_START) {
- throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
+ if (format > FORMAT_MINIMUM) {
+ throw new IndexFormatTooOldException(fileName, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+ }
+ if (format < FORMAT_CURRENT) {
+ throw new IndexFormatTooNewException(fileName, format, FORMAT_MINIMUM, FORMAT_CURRENT);
}
final int size = input.readVInt(); //read in the size
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java Thu Jul 22 19:34:35 2010
@@ -2,6 +2,8 @@ package org.apache.lucene.index;
import java.util.*;
+import org.apache.lucene.util.BytesRef;
+
/**
* Copyright 2007 The Apache Software Foundation
* <p/>
@@ -44,7 +46,7 @@ public class FieldSortedTermVectorMapper
}
@Override
- public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
TermVectorEntry entry = new TermVectorEntry(currentField, term, frequency, offsets, positions);
currentSet.add(entry);
}