You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2008/11/12 10:43:40 UTC
svn commit: r713332 - in /lucene/java/trunk/src:
java/org/apache/lucene/search/ test/org/apache/lucene/search/
Author: mikemccand
Date: Wed Nov 12 01:43:39 2008
New Revision: 713332
URL: http://svn.apache.org/viewvc?rev=713332&view=rev
Log:
LUCENE-1450: make sure RangeQuery/Filter check all terms in the index when using a Collator
Modified:
lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java Wed Nov 12 01:43:39 2008
@@ -70,7 +70,8 @@
this.includeUpper = true;
}
- setEnum(reader.terms(new Term(this.field, this.lowerTermText)));
+ String startTermText = collator == null ? this.lowerTermText : "";
+ setEnum(reader.terms(new Term(this.field, startTermText)));
}
public float difference() {
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java Wed Nov 12 01:43:39 2008
@@ -564,4 +564,43 @@
assertEquals("The index Term should be included.", 1, result.length);
search.close();
}
+
+ public void testDanish() throws Exception {
+
+ /* build an index */
+ RAMDirectory danishIndex = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T,
+ IndexWriter.MaxFieldLength.LIMITED);
+
+ // Danish collation orders the words below in the given order
+ // (example taken from TestSort.testInternationalSort() ).
+ String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+ for (int docnum = 0 ; docnum < words.length ; ++docnum) {
+ Document doc = new Document();
+ doc.add(new Field("content", words[docnum],
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field("body", "body",
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ writer.addDocument(doc);
+ }
+ writer.optimize();
+ writer.close();
+
+ IndexReader reader = IndexReader.open(danishIndex);
+ IndexSearcher search = new IndexSearcher(reader);
+ Query q = new TermQuery(new Term("body","body"));
+
+ Collator c = Collator.getInstance(new Locale("da", "dk"));
+
+ // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+ // but Danish collation does.
+ ScoreDoc[] result = search.search
+ (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
+ assertEquals("The index Term should be included.", 1, result.length);
+
+ result = search.search
+ (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
+ assertEquals("The index Term should not be included.", 0, result.length);
+ search.close();
+ }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java Wed Nov 12 01:43:39 2008
@@ -376,4 +376,46 @@
assertEquals("The index Term should be included.", 1, result.length());
search.close();
}
+
+ public void testDanish() throws Exception {
+
+ /* build an index */
+ RAMDirectory danishIndex = new RAMDirectory();
+ IndexWriter writer = new IndexWriter
+ (danishIndex, new SimpleAnalyzer(), T,
+ IndexWriter.MaxFieldLength.LIMITED);
+ // Danish collation orders the words below in the given order
+ // (example taken from TestSort.testInternationalSort() ).
+ String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+ for (int docnum = 0 ; docnum < words.length ; ++docnum) {
+ Document doc = new Document();
+ doc.add(new Field("content", words[docnum],
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field("body", "body",
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ writer.addDocument(doc);
+ }
+ writer.optimize();
+ writer.close();
+
+ IndexReader reader = IndexReader.open(danishIndex);
+ IndexSearcher search = new IndexSearcher(reader);
+ Query q = new TermQuery(new Term("body","body"));
+
+ Collator collator = Collator.getInstance(new Locale("da", "dk"));
+ Query query = new RangeQuery
+ ("content", "H\u00D8T", "MAND", false, false, collator);
+
+ // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+ // but Danish collation does.
+ Hits result = search.search
+ (q, new RangeFilter("content", "H\u00D8T", "MAND", F, F, collator));
+ assertEquals("The index Term should be included.", 1, result.length());
+
+ result = search.search
+ (q, new RangeFilter("content", "H\u00C5T", "MAND", F, F, collator));
+ assertEquals
+ ("The index Term should not be included.", 0, result.length());
+ search.close();
+ }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java Wed Nov 12 01:43:39 2008
@@ -187,6 +187,26 @@
assertEquals("The index Term should be included.", 1, hits.length);
searcher.close();
}
+
+ public void testDanish() throws Exception {
+ Collator collator = Collator.getInstance(new Locale("da", "dk"));
+ // Danish collation orders the words below in the given order (example taken
+ // from TestSort.testInternationalSort() ).
+ String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+ Query query = new RangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
+
+ // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+ // but Danish collation does.
+ initializeIndex(words);
+ IndexSearcher searcher = new IndexSearcher(dir);
+ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals("The index Term should be included.", 1, hits.length);
+
+ query = new RangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
+ hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals("The index Term should not be included.", 0, hits.length);
+ searcher.close();
+ }
private void initializeIndex(String[] values) throws IOException {
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);