You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/28 06:15:52 UTC
svn commit: r1075210 [2/3] - in /lucene/dev/trunk: lucene/
lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/
lucene/contrib/queries/src/java/org/apache/lucene/search/
lucene/contrib/queries/src/test/org/apache/lucene/search/ lucen...
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java Mon Feb 28 05:15:50 2011
@@ -39,7 +39,10 @@ import java.nio.ByteBuffer;
* <p/>
*
* @lucene.experimental
+ * @deprecated Implement {@link TermToBytesRefAttribute} and store bytes directly
+ * instead. This class will be removed in Lucene 5.0
*/
+@Deprecated
public final class IndexableBinaryStringTools {
private static final CodingCase[] CODING_CASES = {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Mon Feb 28 05:15:50 2011
@@ -577,50 +577,6 @@ public class TestQueryParser extends Luc
assertQueryEquals("[\\* TO \"*\"]",null,"[\\* TO \\*]");
}
- public void testFarsiRangeCollating() throws Exception {
- Directory ramDir = newDirectory();
- IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
- Document doc = new Document();
- doc.add(newField("content","\u0633\u0627\u0628",
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- iw.addDocument(doc);
- iw.close();
- IndexSearcher is = new IndexSearcher(ramDir, true);
-
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
- qp.setRangeCollator(c);
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is not
- // supported).
-
- // Test ConstantScoreRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- // Test TermRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- is.close();
- ramDir.close();
- }
-
private String escapeDateString(String s) {
if (s.indexOf(" ") > -1) {
return "\"" + s + "\"";
@@ -1260,4 +1216,41 @@ public class TestQueryParser extends Luc
Query unexpanded = new TermQuery(new Term("field", "dogs"));
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
+
+ /**
+ * Mock collation analyzer: indexes terms as "collated" + term
+ */
+ private class MockCollationFilter extends TokenFilter {
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ protected MockCollationFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String term = termAtt.toString();
+ termAtt.setEmpty().append("collated").append(term);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ }
+ private class MockCollationAnalyzer extends Analyzer {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
+ }
+ }
+
+ public void testCollatedRange() throws Exception {
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCollationAnalyzer());
+ qp.setAnalyzeRangeTerms(true);
+ Query expected = TermRangeQuery.newStringRange("field", "collatedabc", "collateddef", true, true);
+ Query actual = qp.parse("[abc TO def]");
+ assertEquals(expected, actual);
+ }
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java Mon Feb 28 05:15:50 2011
@@ -35,7 +35,7 @@ public class TestConstantScoreQuery exte
public void testCSQ() throws Exception {
final Query q1 = new ConstantScoreQuery(new TermQuery(new Term("a", "b")));
final Query q2 = new ConstantScoreQuery(new TermQuery(new Term("a", "c")));
- final Query q3 = new ConstantScoreQuery(new TermRangeFilter("a", "b", "c", true, true));
+ final Query q3 = new ConstantScoreQuery(TermRangeFilter.newStringRange("a", "b", "c", true, true));
QueryUtils.check(q1);
QueryUtils.check(q2);
QueryUtils.checkEqual(q1,q1);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDateFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDateFilter.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDateFilter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDateFilter.java Mon Feb 28 05:15:50 2011
@@ -61,12 +61,12 @@ public class TestDateFilter extends Luce
// filter that should preserve matches
// DateFilter df1 = DateFilter.Before("datefield", now);
- TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools
+ TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools
.timeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools
.timeToString(now, DateTools.Resolution.MILLISECOND), false, true);
// filter that should discard matches
// DateFilter df2 = DateFilter.Before("datefield", now - 999999);
- TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools
+ TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools
.timeToString(0, DateTools.Resolution.MILLISECOND), DateTools
.timeToString(now - 2000, DateTools.Resolution.MILLISECOND), true,
false);
@@ -128,13 +128,13 @@ public class TestDateFilter extends Luce
// filter that should preserve matches
// DateFilter df1 = DateFilter.After("datefield", now);
- TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools
+ TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools
.timeToString(now, DateTools.Resolution.MILLISECOND), DateTools
.timeToString(now + 999999, DateTools.Resolution.MILLISECOND), true,
false);
// filter that should discard matches
// DateFilter df2 = DateFilter.After("datefield", now + 999999);
- TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools
+ TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools
.timeToString(now + 999999, DateTools.Resolution.MILLISECOND),
DateTools.timeToString(now + 999999999,
DateTools.Resolution.MILLISECOND), false, true);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java Mon Feb 28 05:15:50 2011
@@ -187,7 +187,7 @@ public class TestFilteredQuery extends L
* This tests FilteredQuery's rewrite correctness
*/
public void testRangeQuery() throws Exception {
- TermRangeQuery rq = new TermRangeQuery(
+ TermRangeQuery rq = TermRangeQuery.newStringRange(
"sorter", "b", "d", true, true);
Query filteredquery = new FilteredQuery(rq, filter);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java Mon Feb 28 05:15:50 2011
@@ -92,26 +92,18 @@ public class TestMultiTermConstantScore
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(method);
return query;
}
/** macro for readability */
- public static Query csrq(String f, String l, String h, boolean il,
- boolean ih, Collator c) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
- query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- return query;
- }
-
- /** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
@@ -142,15 +134,6 @@ public class TestMultiTermConstantScore
}
@Test
- public void testBasicsRngCollating() throws IOException {
- Collator c = Collator.getInstance(Locale.ENGLISH);
- QueryUtils.check(csrq("data", "1", "6", T, T, c));
- QueryUtils.check(csrq("data", "A", "Z", T, T, c));
- QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A",
- "Z", T, T, c));
- }
-
- @Test
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
@@ -262,7 +245,7 @@ public class TestMultiTermConstantScore
// first do a regular TermRangeQuery which uses term expansion so
// docs with more terms in range get higher scores
- Query rq = new TermRangeQuery("data", "1", "4", T, T);
+ Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T);
ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs;
int numHits = expected.length;
@@ -416,92 +399,6 @@ public class TestMultiTermConstantScore
}
@Test
- public void testRangeQueryIdCollating() throws IOException {
- // NOTE: uses index build in *super* setUp
-
- IndexReader reader = signedIndexReader;
- IndexSearcher search = newSearcher(reader);
-
- int medId = ((maxId - minId) / 2);
-
- String minIP = pad(minId);
- String maxIP = pad(maxId);
- String medIP = pad(medId);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- ScoreDoc[] result;
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- // test id, bounded on both ends
-
- result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("all but last", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("all but first", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("all but ends", numDocs - 2, result.length);
-
- result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("med and up", 1 + maxId - medId, result.length);
-
- result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("up to med", 1 + medId - minId, result.length);
-
- // unbounded id
-
- result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("min and up", numDocs, result.length);
-
- result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("max and down", numDocs, result.length);
-
- result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not min, but up", numDocs - 1, result.length);
-
- result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not max, but down", numDocs - 1, result.length);
-
- result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("med and up, not max", maxId - medId, result.length);
-
- result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("not min, up to med", medId - minId, result.length);
-
- // very small sets
-
- result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("min,min,F,F,c", 0, result.length);
- result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("med,med,F,F,c", 0, result.length);
- result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("max,max,F,F,c", 0, result.length);
-
- result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("min,min,T,T,c", 1, result.length);
- result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("nul,min,F,T,c", 1, result.length);
-
- result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("max,max,T,T,c", 1, result.length);
- result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("max,nul,T,T,c", 1, result.length);
-
- result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("med,med,T,T,c", 1, result.length);
-
- search.close();
- }
-
- @Test
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
@@ -564,151 +461,4 @@ public class TestMultiTermConstantScore
search.close();
}
-
- @Test
- public void testRangeQueryRandCollating() throws IOException {
- // NOTE: uses index build in *super* setUp
-
- // using the unsigned index because collation seems to ignore hyphens
- IndexReader reader = unsignedIndexReader;
- IndexSearcher search = newSearcher(reader);
-
- String minRP = pad(unsignedIndexDir.minR);
- String maxRP = pad(unsignedIndexDir.maxR);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- ScoreDoc[] result;
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- // test extremes, bounded on both ends
-
- result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("all but biggest", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("all but smallest", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("all but extremes", numDocs - 2, result.length);
-
- // unbounded
-
- result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("smallest and up", numDocs, result.length);
-
- result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("biggest and down", numDocs, result.length);
-
- result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not smallest, but up", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not biggest, but down", numDocs - 1, result.length);
-
- // very small sets
-
- result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("min,min,F,F,c", 0, result.length);
- result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("max,max,F,F,c", 0, result.length);
-
- result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("min,min,T,T,c", 1, result.length);
- result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("nul,min,F,T,c", 1, result.length);
-
- result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("max,max,T,T,c", 1, result.length);
- result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("max,nul,T,T,c", 1, result.length);
-
- search.close();
- }
-
- @Test
- public void testFarsi() throws Exception {
-
- /* build an index */
- Directory farsiIndex = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
- Document doc = new Document();
- doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- doc
- .add(newField("body", "body", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = newSearcher(reader);
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is
- // not supported).
- ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T,
- c), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null,
- 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
- search.close();
- reader.close();
- farsiIndex.close();
- }
-
- @Test
- public void testDanish() throws Exception {
-
- /* build an index */
- Directory danishIndex = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
-
- // Danish collation orders the words below in the given order
- // (example taken from TestSort.testInternationalSort() ).
- String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
- for (int docnum = 0 ; docnum < words.length ; ++docnum) {
- Document doc = new Document();
- doc.add(newField("content", words[docnum],
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- doc.add(newField("body", "body",
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
- }
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = newSearcher(reader);
-
- Collator c = Collator.getInstance(new Locale("da", "dk"));
-
- // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
- // but Danish collation does.
- ScoreDoc[] result = search.search
- (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- result = search.search
- (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
- search.close();
- reader.close();
- danishIndex.close();
- }
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java Mon Feb 28 05:15:50 2011
@@ -112,7 +112,7 @@ public class TestMultiTermQueryRewrites
}
private void checkDuplicateTerms(MultiTermQuery.RewriteMethod method) throws Exception {
- final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
+ final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
mtq.setRewriteMethod(method);
final Query q1 = searcher.rewrite(mtq);
final Query q2 = multiSearcher.rewrite(mtq);
@@ -158,7 +158,7 @@ public class TestMultiTermQueryRewrites
final MultiTermQuery mtq = new MultiTermQuery("data") {
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- return new TermRangeTermsEnum(terms.iterator(), "2", "7", true, true, null) {
+ return new TermRangeTermsEnum(terms.iterator(), new BytesRef("2"), new BytesRef("7"), true, true) {
final BoostAttribute boostAtt =
attributes().addAttribute(BoostAttribute.class);
@@ -203,7 +203,7 @@ public class TestMultiTermQueryRewrites
// default gets restored automatically by LuceneTestCase:
BooleanQuery.setMaxClauseCount(3);
- final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
+ final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
mtq.setRewriteMethod(method);
try {
multiSearcherDupls.rewrite(mtq);
@@ -219,7 +219,7 @@ public class TestMultiTermQueryRewrites
// default gets restored automatically by LuceneTestCase:
BooleanQuery.setMaxClauseCount(3);
- final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
+ final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true);
mtq.setRewriteMethod(method);
multiSearcherDupls.rewrite(mtq);
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java Mon Feb 28 05:15:50 2011
@@ -67,7 +67,7 @@ public class TestMultiValuedNumericRange
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
- TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true);
+ TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true);
NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true);
TopDocs trTopDocs = searcher.search(cq, 1);
TopDocs nrTopDocs = searcher.search(tq, 1);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java Mon Feb 28 05:15:50 2011
@@ -344,12 +344,10 @@ public class TestNumericRangeQuery32 ext
final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
NumericUtils.intToPrefixCoded(lower, 0, lowerBytes);
NumericUtils.intToPrefixCoded(upper, 0, upperBytes);
- // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string!
- final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString();
// test inclusive range
NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
- TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true);
+ TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -357,7 +355,7 @@ public class TestNumericRangeQuery32 ext
termCountC += cq.getTotalNumberOfTerms();
// test exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false);
- cq=new TermRangeQuery(field, lowerString, upperString, false, false);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -365,7 +363,7 @@ public class TestNumericRangeQuery32 ext
termCountC += cq.getTotalNumberOfTerms();
// test left exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true);
- cq=new TermRangeQuery(field, lowerString, upperString, false, true);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -373,7 +371,7 @@ public class TestNumericRangeQuery32 ext
termCountC += cq.getTotalNumberOfTerms();
// test right exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false);
- cq=new TermRangeQuery(field, lowerString, upperString, true, false);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java Mon Feb 28 05:15:50 2011
@@ -361,12 +361,10 @@ public class TestNumericRangeQuery64 ext
final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
NumericUtils.longToPrefixCoded(lower, 0, lowerBytes);
NumericUtils.longToPrefixCoded(upper, 0, upperBytes);
- // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string!
- final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString();
// test inclusive range
NumericRangeQuery<Long> tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
- TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true);
+ TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -374,7 +372,7 @@ public class TestNumericRangeQuery64 ext
termCountC += cq.getTotalNumberOfTerms();
// test exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false);
- cq=new TermRangeQuery(field, lowerString, upperString, false, false);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -382,7 +380,7 @@ public class TestNumericRangeQuery64 ext
termCountC += cq.getTotalNumberOfTerms();
// test left exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true);
- cq=new TermRangeQuery(field, lowerString, upperString, false, true);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -390,7 +388,7 @@ public class TestNumericRangeQuery64 ext
termCountC += cq.getTotalNumberOfTerms();
// test right exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false);
- cq=new TermRangeQuery(field, lowerString, upperString, true, false);
+ cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java Mon Feb 28 05:15:50 2011
@@ -18,12 +18,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.text.Collator;
import java.util.ArrayList;
import java.util.BitSet;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -110,11 +106,6 @@ public class TestSort extends LuceneTest
{ "d", "m", null, null, null, null, null, null, null, null, null, null}
};
- // the sort order of à versus U depends on the version of the rules being used
- // for the inherited root locale: Ã's order isnt specified in Locale.US since
- // its not used in english.
- private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ã", "U") < 0;
-
// create an index of all the documents, or just the x, or just the y documents
private IndexSearcher getIndex (boolean even, boolean odd)
throws IOException {
@@ -564,12 +555,6 @@ public class TestSort extends LuceneTest
sort.setSort (new SortField ("string", SortField.STRING, true) );
assertMatches (full, queryF, sort, "IJZ");
- sort.setSort (new SortField ("i18n", Locale.ENGLISH));
- assertMatches (full, queryF, sort, "ZJI");
-
- sort.setSort (new SortField ("i18n", Locale.ENGLISH, true));
- assertMatches (full, queryF, sort, "IJZ");
-
sort.setSort (new SortField ("int", SortField.INT) );
assertMatches (full, queryF, sort, "IZJ");
@@ -630,36 +615,6 @@ public class TestSort extends LuceneTest
assertMatches (full, queryX, sort, "GICEA");
}
- // test using a Locale for sorting strings
- public void testLocaleSort() throws Exception {
- sort.setSort (new SortField ("string", Locale.US) );
- assertMatches (full, queryX, sort, "AIGEC");
- assertMatches (full, queryY, sort, "DJHFB");
-
- sort.setSort (new SortField ("string", Locale.US, true) );
- assertMatches (full, queryX, sort, "CEGIA");
- assertMatches (full, queryY, sort, "BFHJD");
- }
-
- // test using various international locales with accented characters
- // (which sort differently depending on locale)
- public void testInternationalSort() throws Exception {
- sort.setSort (new SortField ("i18n", Locale.US));
- assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
-
- sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
- assertMatches (full, queryY, sort, "BJDFH");
-
- sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
- assertMatches (full, queryY, sort, "BJDHF");
-
- sort.setSort (new SortField ("i18n", Locale.US));
- assertMatches (full, queryX, sort, "ECAGI");
-
- sort.setSort (new SortField ("i18n", Locale.FRANCE));
- assertMatches (full, queryX, sort, "EACGI");
- }
-
// test a variety of sorts using a parallel multisearcher
public void testParallelMultiSort() throws Exception {
ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8));
@@ -976,19 +931,6 @@ public class TestSort extends LuceneTest
assertSaneFieldCaches(getName() + " various");
// next we'll check Locale based (String[]) for 'string', so purge first
FieldCache.DEFAULT.purgeAllCaches();
-
- sort.setSort(new SortField ("string", Locale.US) );
- assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
- sort.setSort(new SortField ("string", Locale.US, true) );
- assertMatches(multi, queryA, sort, "CBEFGHIAJD");
-
- sort.setSort(new SortField ("string", Locale.UK) );
- assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
- assertSaneFieldCaches(getName() + " Locale.US + Locale.UK");
- FieldCache.DEFAULT.purgeAllCaches();
-
}
private void assertMatches(IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException {
@@ -1014,37 +956,6 @@ public class TestSort extends LuceneTest
assertEquals (msg, expectedResult, buff.toString());
}
- private HashMap<String,Float> getScores (ScoreDoc[] hits, IndexSearcher searcher)
- throws IOException {
- HashMap<String,Float> scoreMap = new HashMap<String,Float>();
- int n = hits.length;
- for (int i=0; i<n; ++i) {
- Document doc = searcher.doc(hits[i].doc);
- String[] v = doc.getValues("tracer");
- assertEquals (v.length, 1);
- scoreMap.put (v[0], Float.valueOf(hits[i].score));
- }
- return scoreMap;
- }
-
- // make sure all the values in the maps match
- private <K, V> void assertSameValues (HashMap<K,V> m1, HashMap<K,V> m2) {
- int n = m1.size();
- int m = m2.size();
- assertEquals (n, m);
- Iterator<K> iter = m1.keySet().iterator();
- while (iter.hasNext()) {
- K key = iter.next();
- V o1 = m1.get(key);
- V o2 = m2.get(key);
- if (o1 instanceof Float) {
- assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6);
- } else {
- assertEquals (m1.get(key), m2.get(key));
- }
- }
- }
-
public void testEmptyStringVsNullStringSort() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java Mon Feb 28 05:15:50 2011
@@ -18,15 +18,9 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.text.Collator;
-import java.util.Locale;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.store.Directory;
import org.junit.Test;
/**
@@ -61,83 +55,83 @@ public class TestTermRangeFilter extends
// test id, bounded on both ends
- result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, T),
numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, F),
numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, T),
numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, F),
numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
- result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, T),
numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, T, T),
numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
// unbounded id
- result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, T, F),
numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, T),
numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, F, F),
numDocs).scoreDocs;
assertEquals("not min, but up", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, F),
numDocs).scoreDocs;
assertEquals("not max, but down", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, F),
numDocs).scoreDocs;
assertEquals("med and up, not max", maxId - medId, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, F, T),
numDocs).scoreDocs;
assertEquals("not min, up to med", medId - minId, result.length);
// very small sets
- result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, F, F),
numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
- result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, F, F),
numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
- result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, F, F),
numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
- result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, T, T),
numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", null, minIP, F, T),
numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, T, T),
numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, null, T, F),
numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, T, T),
numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
@@ -145,110 +139,6 @@ public class TestTermRangeFilter extends
}
@Test
- public void testRangeFilterIdCollating() throws IOException {
-
- IndexReader reader = signedIndexReader;
- IndexSearcher search = newSearcher(reader);
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- int medId = ((maxId - minId) / 2);
-
- String minIP = pad(minId);
- String maxIP = pad(maxId);
- String medIP = pad(medId);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- Query q = new TermQuery(new Term("body", "body"));
-
- // test id, bounded on both ends
- int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
- T, c), 1000).totalHits;
- assertEquals("find all", numDocs, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
- assertEquals("all but last", numDocs - 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
- assertEquals("all but first", numDocs - 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
- assertEquals("all but ends", numDocs - 2, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
- assertEquals("med and up", 1 + maxId - medId, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
- assertEquals("up to med", 1 + medId - minId, numHits);
-
- // unbounded id
-
- numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
- 1000).totalHits;
- assertEquals("min and up", numDocs, numHits);
-
- numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
- 1000).totalHits;
- assertEquals("max and down", numDocs, numHits);
-
- numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
- 1000).totalHits;
- assertEquals("not min, but up", numDocs - 1, numHits);
-
- numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
- 1000).totalHits;
- assertEquals("not max, but down", numDocs - 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
- assertEquals("med and up, not max", maxId - medId, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
- assertEquals("not min, up to med", medId - minId, numHits);
-
- // very small sets
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
- assertEquals("min,min,F,F", 0, numHits);
- numHits = search.search(q,
- new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
- assertEquals("med,med,F,F", 0, numHits);
- numHits = search.search(q,
- new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
- assertEquals("max,max,F,F", 0, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
- assertEquals("min,min,T,T", 1, numHits);
- numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
- 1000).totalHits;
- assertEquals("nul,min,F,T", 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
- assertEquals("max,max,T,T", 1, numHits);
- numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
- 1000).totalHits;
- assertEquals("max,nul,T,T", 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
- assertEquals("med,med,T,T", 1, numHits);
-
- search.close();
- }
-
- @Test
public void testRangeFilterRand() throws IOException {
IndexReader reader = signedIndexReader;
@@ -266,223 +156,63 @@ public class TestTermRangeFilter extends
// test extremes, bounded on both ends
- result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, T),
numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, F),
numDocs).scoreDocs;
assertEquals("all but biggest", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, T),
numDocs).scoreDocs;
assertEquals("all but smallest", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, F),
numDocs).scoreDocs;
assertEquals("all but extremes", numDocs - 2, result.length);
// unbounded
- result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, T, F),
numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, T),
numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
- result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, F, F),
numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs - 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, F),
numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs - 1, result.length);
// very small sets
- result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, F, F),
numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
- result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, F, F),
numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
- result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, T, T),
numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", null, minRP, F, T),
numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, T, T),
numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
- result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
+ result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, null, T, F),
numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
search.close();
}
-
- @Test
- public void testRangeFilterRandCollating() throws IOException {
-
- // using the unsigned index because collation seems to ignore hyphens
- IndexReader reader = unsignedIndexReader;
- IndexSearcher search = newSearcher(reader);
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- String minRP = pad(unsignedIndexDir.minR);
- String maxRP = pad(unsignedIndexDir.maxR);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- Query q = new TermQuery(new Term("body", "body"));
-
- // test extremes, bounded on both ends
-
- int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
- T, c), 1000).totalHits;
- assertEquals("find all", numDocs, numHits);
-
- numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
- c), 1000).totalHits;
- assertEquals("all but biggest", numDocs - 1, numHits);
-
- numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
- c), 1000).totalHits;
- assertEquals("all but smallest", numDocs - 1, numHits);
-
- numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
- c), 1000).totalHits;
- assertEquals("all but extremes", numDocs - 2, numHits);
-
- // unbounded
-
- numHits = search.search(q,
- new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
- assertEquals("smallest and up", numDocs, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
- assertEquals("biggest and down", numDocs, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
- assertEquals("not smallest, but up", numDocs - 1, numHits);
-
- numHits = search.search(q,
- new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
- assertEquals("not biggest, but down", numDocs - 1, numHits);
-
- // very small sets
-
- numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
- c), 1000).totalHits;
- assertEquals("min,min,F,F", 0, numHits);
- numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
- c), 1000).totalHits;
- assertEquals("max,max,F,F", 0, numHits);
-
- numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
- c), 1000).totalHits;
- assertEquals("min,min,T,T", 1, numHits);
- numHits = search.search(q,
- new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
- assertEquals("nul,min,F,T", 1, numHits);
-
- numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
- c), 1000).totalHits;
- assertEquals("max,max,T,T", 1, numHits);
- numHits = search.search(q,
- new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
- assertEquals("max,nul,T,T", 1, numHits);
-
- search.close();
- }
-
- @Test
- public void testFarsi() throws Exception {
-
- /* build an index */
- Directory farsiIndex = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
- Document doc = new Document();
- doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- doc
- .add(newField("body", "body", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = newSearcher(reader);
- Query q = new TermQuery(new Term("body", "body"));
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator collator = Collator.getInstance(new Locale("ar"));
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a TermRangeFilter with a Farsi
- // Collator (or an Arabic one for the case when Farsi is not supported).
- int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
- "\u0698", T, T, collator), 1000).totalHits;
- assertEquals("The index Term should not be included.", 0, numHits);
-
- numHits = search.search(q, new TermRangeFilter("content", "\u0633",
- "\u0638", T, T, collator), 1000).totalHits;
- assertEquals("The index Term should be included.", 1, numHits);
- search.close();
- reader.close();
- farsiIndex.close();
- }
-
- @Test
- public void testDanish() throws Exception {
-
- /* build an index */
- Directory danishIndex = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
- // Danish collation orders the words below in the given order
- // (example taken from TestSort.testInternationalSort() ).
- String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
- for (int docnum = 0; docnum < words.length; ++docnum) {
- Document doc = new Document();
- doc.add(newField("content", words[docnum], Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- doc.add(newField("body", "body", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
- }
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = newSearcher(reader);
- Query q = new TermQuery(new Term("body", "body"));
-
- Collator collator = Collator.getInstance(new Locale("da", "dk"));
-
- // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
- // but Danish collation does.
- int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
- "MAND", F, F, collator), 1000).totalHits;
- assertEquals("The index Term should be included.", 1, numHits);
-
- numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
- "MAND", F, F, collator), 1000).totalHits;
- assertEquals("The index Term should not be included.", 0, numHits);
- search.close();
- reader.close();
- danishIndex.close();
- }
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java Mon Feb 28 05:15:50 2011
@@ -53,7 +53,7 @@ public class TestTermRangeQuery extends
}
public void testExclusive() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", false, false);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir, true);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@@ -74,7 +74,7 @@ public class TestTermRangeQuery extends
}
public void testInclusive() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -105,11 +105,11 @@ public class TestTermRangeQuery extends
query = new TermRangeQuery("content", null, null, false, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
- query = new TermRangeQuery("content", "", null, true, false);
+ query = TermRangeQuery.newStringRange("content", "", null, true, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
// and now anothe one
- query = new TermRangeQuery("content", "B", null, true, false);
+ query = TermRangeQuery.newStringRange("content", "B", null, true, false);
assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
searcher.close();
@@ -121,7 +121,7 @@ public class TestTermRangeQuery extends
initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
IndexSearcher searcher = new IndexSearcher(dir, true);
- TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
+ TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true);
checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
final int savedClauseCount = BooleanQuery.getMaxClauseCount();
@@ -150,10 +150,10 @@ public class TestTermRangeQuery extends
}
public void testEqualsHashcode() {
- Query query = new TermRangeQuery("content", "A", "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
query.setBoost(1.0f);
- Query other = new TermRangeQuery("content", "A", "C", true, true);
+ Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
other.setBoost(1.0f);
assertEquals("query equals itself is true", query, query);
@@ -163,120 +163,32 @@ public class TestTermRangeQuery extends
other.setBoost(2.0f);
assertFalse("Different boost queries are not equal", query.equals(other));
- other = new TermRangeQuery("notcontent", "A", "C", true, true);
+ other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true);
assertFalse("Different fields are not equal", query.equals(other));
- other = new TermRangeQuery("content", "X", "C", true, true);
+ other = TermRangeQuery.newStringRange("content", "X", "C", true, true);
assertFalse("Different lower terms are not equal", query.equals(other));
- other = new TermRangeQuery("content", "A", "Z", true, true);
+ other = TermRangeQuery.newStringRange("content", "A", "Z", true, true);
assertFalse("Different upper terms are not equal", query.equals(other));
- query = new TermRangeQuery("content", null, "C", true, true);
- other = new TermRangeQuery("content", null, "C", true, true);
+ query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+ other = TermRangeQuery.newStringRange("content", null, "C", true, true);
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
- query = new TermRangeQuery("content", "C", null, true, true);
- other = new TermRangeQuery("content", "C", null, true, true);
+ query = TermRangeQuery.newStringRange("content", "C", null, true, true);
+ other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertEquals("equivalent queries with null upperterms are equal()", query, other);
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
- query = new TermRangeQuery("content", null, "C", true, true);
- other = new TermRangeQuery("content", "C", null, true, true);
+ query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+ other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
- query = new TermRangeQuery("content", "A", "C", false, false);
- other = new TermRangeQuery("content", "A", "C", true, true);
+ query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
+ other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
assertFalse("queries with different inclusive are not equal", query.equals(other));
-
- query = new TermRangeQuery("content", "A", "C", false, false);
- other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
- assertFalse("a query with a collator is not equal to one without", query.equals(other));
- }
-
- public void testExclusiveCollating() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
- initializeIndex(new String[] {"A", "B", "C", "D"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,C,D, only B in range", 1, hits.length);
- searcher.close();
-
- initializeIndex(new String[] {"A", "B", "D"});
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,D, only B in range", 1, hits.length);
- searcher.close();
-
- addDoc("C");
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("C added, still only B in range", 1, hits.length);
- searcher.close();
- }
-
- public void testInclusiveCollating() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
-
- initializeIndex(new String[]{"A", "B", "C", "D"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
- searcher.close();
-
- initializeIndex(new String[]{"A", "B", "D"});
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,D - A and B in range", 2, hits.length);
- searcher.close();
-
- addDoc("C");
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("C added - A, B, C in range", 3, hits.length);
- searcher.close();
- }
-
- public void testFarsi() throws Exception {
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator collator = Collator.getInstance(new Locale("ar"));
- Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a TermRangeQuery with a Farsi
- // Collator (or an Arabic one for the case when Farsi is not supported).
- initializeIndex(new String[]{ "\u0633\u0627\u0628"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, hits.length);
-
- query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, hits.length);
- searcher.close();
- }
-
- public void testDanish() throws Exception {
- Collator collator = Collator.getInstance(new Locale("da", "dk"));
- // Danish collation orders the words below in the given order (example taken
- // from TestSort.testInternationalSort() ).
- String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
- Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
-
- // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
- // but Danish collation does.
- initializeIndex(words);
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, hits.length);
-
- query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, hits.length);
- searcher.close();
}
private static class SingleCharAnalyzer extends Analyzer {
@@ -363,7 +275,7 @@ public class TestTermRangeQuery extends
public void testExclusiveLowerNull() throws Exception {
Analyzer analyzer = new SingleCharAnalyzer();
//http://issues.apache.org/jira/browse/LUCENE-38
- Query query = new TermRangeQuery("content", null, "C",
+ Query query = TermRangeQuery.newStringRange("content", null, "C",
false, false);
initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -396,7 +308,7 @@ public class TestTermRangeQuery extends
public void testInclusiveLowerNull() throws Exception {
//http://issues.apache.org/jira/browse/LUCENE-38
Analyzer analyzer = new SingleCharAnalyzer();
- Query query = new TermRangeQuery("content", null, "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir, true);
int numHits = searcher.search(query, null, 1000).totalHits;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java Mon Feb 28 05:15:50 2011
@@ -17,6 +17,10 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+/**
+ * @deprecated Remove when IndexableBinaryStringTools is removed.
+ */
+@Deprecated
public class TestIndexableBinaryStringTools extends LuceneTestCase {
private static final int NUM_RANDOM_TESTS = 2000 * RANDOM_MULTIPLIER;
private static final int MAX_RANDOM_BINARY_LENGTH = 300 * RANDOM_MULTIPLIER;
Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/analysis/CHANGES.txt Mon Feb 28 05:15:50 2011
@@ -25,6 +25,10 @@ API Changes
* LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles
can be generated. (Chris Harris via Steven Rowe)
+ * LUCENE-2514, LUCENE-2551: JDK and ICU CollationKeyAnalyzers were changed to
+ use pure byte keys when Version >= 4.0. This cuts sort key size approximately
+ in half. (Robert Muir)
+
New Features
* LUCENE-2413: Consolidated Solr analysis components into common.
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Mon Feb 28 05:15:50 2011
@@ -29,8 +29,8 @@ import org.apache.lucene.util.AttributeS
* Emits the entire input as a single token.
*/
public final class KeywordTokenizer extends Tokenizer {
-
- private static final int DEFAULT_BUFFER_SIZE = 256;
+ /** Default read buffer size */
+ public static final int DEFAULT_BUFFER_SIZE = 256;
private boolean done = false;
private int finalOffset;
Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java?rev=1075210&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java Mon Feb 28 05:15:50 2011
@@ -0,0 +1,103 @@
+package org.apache.lucene.collation;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.Collator;
+
+import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * <p>
+ * Converts each token into its {@link java.text.CollationKey}, and then
+ * encodes the bytes as an index term.
+ * </p>
+ * <p>
+ * <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ * index and query time -- CollationKeys are only comparable when produced by
+ * the same Collator. Since {@link java.text.RuleBasedCollator}s are not
+ * independently versioned, it is unsafe to search against stored
+ * CollationKeys unless the following are exactly the same (best practice is
+ * to store this information with the index and check that they remain the
+ * same at query time):
+ * </p>
+ * <ol>
+ * <li>JVM vendor</li>
+ * <li>JVM version, including patch version</li>
+ * <li>
+ * The language (and country and variant, if specified) of the Locale
+ * used when constructing the collator via
+ * {@link Collator#getInstance(java.util.Locale)}.
+ * </li>
+ * <li>
+ * The collation strength used - see {@link Collator#setStrength(int)}
+ * </li>
+ * </ol>
+ * <p>
+ * The <code>ICUCollationAttributeFactory</code> in the icu package of Lucene's
+ * contrib area uses ICU4J's Collator, which makes its
+ * version available, thus allowing collation to be versioned independently
+ * from the JVM. ICUCollationAttributeFactory is also significantly faster and
+ * generates significantly shorter keys than CollationAttributeFactory. See
+ * <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+ * >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+ * generation timing and key length comparisons between ICU4J and
+ * java.text.Collator over several languages.
+ * </p>
+ * <p>
+ * CollationKeys generated by java.text.Collators are not compatible
+ * with those those generated by ICU Collators. Specifically, if you use
+ * CollationAttributeFactory to generate index terms, do not use
+ * ICUCollationAttributeFactory on the query side, or vice versa.
+ * </p>
+ */
+public class CollationAttributeFactory extends AttributeSource.AttributeFactory {
+ private final Collator collator;
+ private final AttributeSource.AttributeFactory delegate;
+
+ /**
+ * Create a CollationAttributeFactory, using
+ * {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
+ * factory for all other attributes.
+ * @param collator CollationKey generator
+ */
+ public CollationAttributeFactory(Collator collator) {
+ this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
+ }
+
+ /**
+ * Create a CollationAttributeFactory, using the supplied Attribute Factory
+ * as the factory for all other attributes.
+ * @param delegate Attribute Factory
+ * @param collator CollationKey generator
+ */
+ public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
+ this.delegate = delegate;
+ this.collator = collator;
+ }
+
+ @Override
+ public AttributeImpl createAttributeInstance(
+ Class<? extends Attribute> attClass) {
+ return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
+ ? new CollatedTermAttributeImpl(collator)
+ : delegate.createAttributeInstance(attClass);
+ }
+}
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java Mon Feb 28 05:15:50 2011
@@ -18,14 +18,13 @@ package org.apache.lucene.collation;
*/
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
+import org.apache.lucene.util.Version;
import java.text.Collator;
import java.io.Reader;
-import java.io.IOException;
/**
* <p>
@@ -33,8 +32,8 @@ import java.io.IOException;
* </p>
* <p>
* Converts the token into its {@link java.text.CollationKey}, and then
- * encodes the CollationKey with
- * {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow
+ * encodes the CollationKey either directly or with
+ * {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow
* it to be stored as an index term.
* </p>
* <p>
@@ -75,39 +74,49 @@ import java.io.IOException;
* CollationKeyAnalyzer to generate index terms, do not use
* ICUCollationKeyAnalyzer on the query side, or vice versa.
* </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating CollationKeyAnalyzer:
+ * <ul>
+ * <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+ * versions will encode the bytes with {@link IndexableBinaryStringTools}.
+ * </ul>
*/
-public final class CollationKeyAnalyzer extends Analyzer {
- private Collator collator;
-
- public CollationKeyAnalyzer(Collator collator) {
+public final class CollationKeyAnalyzer extends ReusableAnalyzerBase {
+ private final Collator collator;
+ private final CollationAttributeFactory factory;
+ private final Version matchVersion;
+
+ /**
+ * Create a new CollationKeyAnalyzer, using the specified collator.
+ *
+ * @param matchVersion See <a href="#version">above</a>
+ * @param collator CollationKey generator
+ */
+ public CollationKeyAnalyzer(Version matchVersion, Collator collator) {
+ this.matchVersion = matchVersion;
this.collator = collator;
- }
-
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new KeywordTokenizer(reader);
- result = new CollationKeyFilter(result, collator);
- return result;
+ this.factory = new CollationAttributeFactory(collator);
}
- private class SavedStreams {
- Tokenizer source;
- TokenStream result;
+ /**
+ * @deprecated Use {@link CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)}
+ * and specify a version instead. This ctor will be removed in Lucene 5.0
+ */
+ @Deprecated
+ public CollationKeyAnalyzer(Collator collator) {
+ this(Version.LUCENE_31, collator);
}
-
+
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader)
- throws IOException {
-
- SavedStreams streams = (SavedStreams)getPreviousTokenStream();
- if (streams == null) {
- streams = new SavedStreams();
- streams.source = new KeywordTokenizer(reader);
- streams.result = new CollationKeyFilter(streams.source, collator);
- setPreviousTokenStream(streams);
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ if (matchVersion.onOrAfter(Version.LUCENE_40)) {
+ KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+ return new TokenStreamComponents(tokenizer, tokenizer);
} else {
- streams.source.reset(reader);
+ KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator));
}
- return streams.result;
}
}
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java Mon Feb 28 05:15:50 2011
@@ -71,7 +71,10 @@ import java.text.Collator;
* CollationKeyFilter to generate index terms, do not use
* ICUCollationKeyFilter on the query side, or vice versa.
* </p>
+ * @deprecated Use {@link CollationAttributeFactory} instead, which encodes
+ * terms directly as bytes. This filter will be removed in Lucene 5.0
*/
+@Deprecated
public final class CollationKeyFilter extends TokenFilter {
private final Collator collator;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/package.html?rev=1075210&r1=1075209&r2=1075210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/package.html (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/collation/package.html Mon Feb 28 05:15:50 2011
@@ -55,10 +55,9 @@
<code><pre>
// "fa" Locale is not supported by Sun JDK 1.4 or 1.5
Collator collator = Collator.getInstance(new Locale("ar"));
- CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(collator);
+ CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
RAMDirectory ramDir = new RAMDirectory();
- IndexWriter writer = new IndexWriter
- (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+ IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
Document doc = new Document();
doc.add(new Field("content", "\u0633\u0627\u0628",
Field.Store.YES, Field.Index.ANALYZED));
@@ -66,12 +65,9 @@
writer.close();
IndexSearcher is = new IndexSearcher(ramDir, true);
- // The AnalyzingQueryParser in Lucene's contrib allows terms in range queries
- // to be passed through an analyzer - Lucene's standard QueryParser does not
- // allow this.
- AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
- aqp.setLowercaseExpandedTerms(false);
-
+ QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
+ aqp.setAnalyzeRangeTerms(true);
+
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// indexed Term above should NOT be returned by a ConstantScoreRangeQuery
@@ -85,10 +81,9 @@
<h3>Danish Sorting</h3>
<code><pre>
Analyzer analyzer
- = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+ = new CollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new Locale("da", "dk")));
RAMDirectory indexStore = new RAMDirectory();
- IndexWriter writer = new IndexWriter
- (indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+ IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
String[] tracer = new String[] { "A", "B", "C", "D", "E" };
String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
@@ -99,7 +94,7 @@
writer.addDocument(doc);
}
writer.close();
- Searcher searcher = new IndexSearcher(indexStore, true);
+ IndexSearcher searcher = new IndexSearcher(indexStore, true);
Sort sort = new Sort();
sort.setSort(new SortField("contents", SortField.STRING));
Query query = new MatchAllDocsQuery();
@@ -114,16 +109,15 @@
<code><pre>
Collator collator = Collator.getInstance(new Locale("tr", "TR"));
collator.setStrength(Collator.PRIMARY);
- Analyzer analyzer = new CollationKeyAnalyzer(collator);
+ Analyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
RAMDirectory ramDir = new RAMDirectory();
- IndexWriter writer = new IndexWriter
- (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+ IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
Document doc = new Document();
doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexSearcher is = new IndexSearcher(ramDir, true);
- QueryParser parser = new QueryParser("contents", analyzer);
+ QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
Query query = parser.parse("d\u0131gy"); // U+0131: dotless i
ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);