You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2008/11/12 10:43:40 UTC

svn commit: r713332 - in /lucene/java/trunk/src: java/org/apache/lucene/search/ test/org/apache/lucene/search/

Author: mikemccand
Date: Wed Nov 12 01:43:39 2008
New Revision: 713332

URL: http://svn.apache.org/viewvc?rev=713332&view=rev
Log:
LUCENE-1450: make sure RangeQuery/Filter check all terms in the index when using a Collator

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeTermEnum.java Wed Nov 12 01:43:39 2008
@@ -70,7 +70,8 @@
       this.includeUpper = true;
     }
 
-    setEnum(reader.terms(new Term(this.field, this.lowerTermText)));
+    String startTermText = collator == null ? this.lowerTermText : "";
+    setEnum(reader.terms(new Term(this.field, startTermText)));
   }
 
   public float difference() {

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java Wed Nov 12 01:43:39 2008
@@ -564,4 +564,43 @@
     assertEquals("The index Term should be included.", 1, result.length);
     search.close();
   }
+
+  public void testDanish() throws Exception {
+
+    /* build an index */
+    RAMDirectory danishIndex = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T,
+                                         IndexWriter.MaxFieldLength.LIMITED);
+
+    // Danish collation orders the words below in the given order
+    // (example taken from TestSort.testInternationalSort() ).
+    String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+    for (int docnum = 0 ; docnum < words.length ; ++docnum) {   
+      Document doc = new Document();
+      doc.add(new Field("content", words[docnum], 
+                        Field.Store.YES, Field.Index.UN_TOKENIZED));
+      doc.add(new Field("body", "body",
+                        Field.Store.YES, Field.Index.UN_TOKENIZED));
+      writer.addDocument(doc);
+    }
+    writer.optimize();
+    writer.close();
+
+    IndexReader reader = IndexReader.open(danishIndex);
+    IndexSearcher search = new IndexSearcher(reader);
+    Query q = new TermQuery(new Term("body","body"));
+
+    Collator c = Collator.getInstance(new Locale("da", "dk"));
+
+    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+    // but Danish collation does.
+    ScoreDoc[] result = search.search
+      (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, result.length);
+
+    result = search.search
+      (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, result.length);
+    search.close();
+  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java Wed Nov 12 01:43:39 2008
@@ -376,4 +376,46 @@
         assertEquals("The index Term should be included.", 1, result.length());
         search.close();
     }
+
+    public void testDanish() throws Exception {
+            
+        /* build an index */
+        RAMDirectory danishIndex = new RAMDirectory();
+        IndexWriter writer = new IndexWriter
+            (danishIndex, new SimpleAnalyzer(), T, 
+             IndexWriter.MaxFieldLength.LIMITED);
+        // Danish collation orders the words below in the given order
+        // (example taken from TestSort.testInternationalSort() ).
+        String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+        for (int docnum = 0 ; docnum < words.length ; ++docnum) {   
+            Document doc = new Document();
+            doc.add(new Field("content", words[docnum], 
+                              Field.Store.YES, Field.Index.UN_TOKENIZED));
+            doc.add(new Field("body", "body",
+                              Field.Store.YES, Field.Index.UN_TOKENIZED));
+            writer.addDocument(doc);
+        }
+        writer.optimize();
+        writer.close();
+
+        IndexReader reader = IndexReader.open(danishIndex);
+        IndexSearcher search = new IndexSearcher(reader);
+        Query q = new TermQuery(new Term("body","body"));
+
+        Collator collator = Collator.getInstance(new Locale("da", "dk"));
+        Query query = new RangeQuery
+            ("content", "H\u00D8T", "MAND", false, false, collator);
+
+        // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+        // but Danish collation does.
+        Hits result = search.search
+            (q, new RangeFilter("content", "H\u00D8T", "MAND", F, F, collator));
+        assertEquals("The index Term should be included.", 1, result.length());
+
+        result = search.search
+            (q, new RangeFilter("content", "H\u00C5T", "MAND", F, F, collator));
+        assertEquals
+            ("The index Term should not be included.", 0, result.length());
+        search.close();
+    }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java?rev=713332&r1=713331&r2=713332&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java Wed Nov 12 01:43:39 2008
@@ -187,6 +187,26 @@
     assertEquals("The index Term should be included.", 1, hits.length);
     searcher.close();
   }
+  
+  public void testDanish() throws Exception {
+    Collator collator = Collator.getInstance(new Locale("da", "dk"));
+    // Danish collation orders the words below in the given order (example taken
+    // from TestSort.testInternationalSort() ).
+    String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
+    Query query = new RangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
+
+    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
+    // but Danish collation does.
+    initializeIndex(words);
+    IndexSearcher searcher = new IndexSearcher(dir);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, hits.length);
+
+    query = new RangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, hits.length);
+    searcher.close();
+  }
 
   private void initializeIndex(String[] values) throws IOException {
     IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);