You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by yo...@apache.org on 2006/04/06 06:02:10 UTC
svn commit: r391895 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/search/FieldCacheImpl.java
src/java/org/apache/lucene/search/FieldSortedHitQueue.java
src/test/org/apache/lucene/search/TestSort.java
Author: yonik
Date: Wed Apr 5 21:02:09 2006
New Revision: 391895
URL: http://svn.apache.org/viewcvs?rev=391895&view=rev
Log:
FieldSortedHitQueue - subsequent String sorts with different locales sort identically: LUCENE-526
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Apr 5 21:02:09 2006
@@ -34,6 +34,9 @@
that sometimes caused the index order of documents to change.
(Yonik Seeley)
+ 7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused
+ subsequent String sorts with different locales to sort identically.
+ (Paul Cowan via Yonik Seeley)
1.9.1
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Apr 5 21:02:09 2006
@@ -23,6 +23,7 @@
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
import java.io.IOException;
+import java.util.Locale;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.HashMap;
@@ -45,12 +46,14 @@
final String field; // which Field
final int type; // which SortField type
final Object custom; // which custom comparator
+ final Locale locale; // the locale we're sorting (if string)
/** Creates one of these objects. */
- Entry (String field, int type) {
+ Entry (String field, int type, Locale locale) {
this.field = field.intern();
this.type = type;
this.custom = null;
+ this.locale = locale;
}
/** Creates one of these objects for a custom comparator. */
@@ -58,6 +61,7 @@
this.field = field.intern();
this.type = SortField.CUSTOM;
this.custom = custom;
+ this.locale = null;
}
/** Two of these are equal iff they reference the same field and type. */
@@ -65,10 +69,12 @@
if (o instanceof Entry) {
Entry other = (Entry) o;
if (other.field == field && other.type == type) {
- if (other.custom == null) {
- if (custom == null) return true;
- } else if (other.custom.equals (custom)) {
- return true;
+ if (other.locale == null ? locale == null : other.locale.equals(locale)) {
+ if (other.custom == null) {
+ if (custom == null) return true;
+ } else if (other.custom.equals (custom)) {
+ return true;
+ }
}
}
}
@@ -77,7 +83,7 @@
/** Composes a hashcode based on the field and type. */
public int hashCode() {
- return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode());
+ return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()) ^ (locale==null ? 0 : locale.hashCode());
}
}
@@ -97,8 +103,8 @@
final Map cache = new WeakHashMap();
/** See if an object is in the cache. */
- Object lookup (IndexReader reader, String field, int type) {
- Entry entry = new Entry (field, type);
+ Object lookup (IndexReader reader, String field, int type, Locale locale) {
+ Entry entry = new Entry (field, type, locale);
synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) return null;
@@ -117,8 +123,8 @@
}
/** Put an object into the cache. */
- Object store (IndexReader reader, String field, int type, Object value) {
- Entry entry = new Entry (field, type);
+ Object store (IndexReader reader, String field, int type, Locale locale, Object value) {
+ Entry entry = new Entry (field, type, locale);
synchronized (this) {
HashMap readerCache = (HashMap)cache.get(reader);
if (readerCache == null) {
@@ -215,7 +221,7 @@
public String[] getStrings (IndexReader reader, String field)
throws IOException {
field = field.intern();
- Object ret = lookup (reader, field, SortField.STRING);
+ Object ret = lookup (reader, field, SortField.STRING, null);
if (ret == null) {
final String[] retArray = new String[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
@@ -234,7 +240,7 @@
termDocs.close();
termEnum.close();
}
- store (reader, field, SortField.STRING, retArray);
+ store (reader, field, SortField.STRING, null, retArray);
return retArray;
}
return (String[]) ret;
@@ -244,7 +250,7 @@
public StringIndex getStringIndex (IndexReader reader, String field)
throws IOException {
field = field.intern();
- Object ret = lookup (reader, field, STRING_INDEX);
+ Object ret = lookup (reader, field, STRING_INDEX, null);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()+1];
@@ -295,7 +301,7 @@
}
StringIndex value = new StringIndex (retArray, mterms);
- store (reader, field, STRING_INDEX, value);
+ store (reader, field, STRING_INDEX, null, value);
return value;
}
return (StringIndex) ret;
@@ -316,7 +322,7 @@
public Object getAuto (IndexReader reader, String field)
throws IOException {
field = field.intern();
- Object ret = lookup (reader, field, SortField.AUTO);
+ Object ret = lookup (reader, field, SortField.AUTO, null);
if (ret == null) {
TermEnum enumerator = reader.terms (new Term (field, ""));
try {
@@ -350,7 +356,7 @@
}
}
if (ret != null) {
- store (reader, field, SortField.AUTO, ret);
+ store (reader, field, SortField.AUTO, null, ret);
}
} else {
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Wed Apr 5 21:02:09 2006
@@ -56,7 +56,12 @@
for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField();
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
- this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
+
+ if (comparators[i].sortType() == SortField.STRING) {
+ this.fields[i] = new SortField (fieldname, fields[i].getLocale(), fields[i].getReverse());
+ } else {
+ this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
+ }
}
initialize (size);
}
@@ -147,10 +152,10 @@
static final Map Comparators = new WeakHashMap();
/** Returns a comparator if it is in the cache. */
- static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) {
+ static ScoreDocComparator lookup (IndexReader reader, String field, int type, Locale locale, Object factory) {
FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory)
- : new FieldCacheImpl.Entry (field, type);
+ : new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) return null;
@@ -159,10 +164,10 @@
}
/** Stores a comparator into the cache. */
- static Object store (IndexReader reader, String field, int type, Object factory, Object value) {
+ static Object store (IndexReader reader, String field, int type, Locale locale, Object factory, Object value) {
FieldCacheImpl.Entry entry = (factory != null)
? new FieldCacheImpl.Entry (field, factory)
- : new FieldCacheImpl.Entry (field, type);
+ : new FieldCacheImpl.Entry (field, type, locale);
synchronized (Comparators) {
HashMap readerCache = (HashMap)Comparators.get(reader);
if (readerCache == null) {
@@ -177,7 +182,7 @@
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
- ScoreDocComparator comparator = lookup (reader, fieldname, type, factory);
+ ScoreDocComparator comparator = lookup (reader, fieldname, type, locale, factory);
if (comparator == null) {
switch (type) {
case SortField.AUTO:
@@ -199,7 +204,7 @@
default:
throw new RuntimeException ("unknown field type: "+type);
}
- store (reader, fieldname, type, factory, comparator);
+ store (reader, fieldname, type, locale, factory, comparator);
}
return comparator;
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java Wed Apr 5 21:02:09 2006
@@ -94,22 +94,23 @@
// the int field to sort by int
// the float field to sort by float
// the string field to sort by string
+ // the i18n field includes accented characters for testing locale-specific sorting
private String[][] data = new String[][] {
- // tracer contents int float string custom
- { "A", "x a", "5", "4f", "c", "A-3" },
- { "B", "y a", "5", "3.4028235E38", "i", "B-10" },
- { "C", "x a b c", "2147483647", "1.0", "j", "A-2" },
- { "D", "y a b c", "-1", "0.0f", "a", "C-0" },
- { "E", "x a b c d", "5", "2f", "h", "B-8" },
- { "F", "y a b c d", "2", "3.14159f", "g", "B-1" },
- { "G", "x a b c d", "3", "-1.0", "f", "C-100" },
- { "H", "y a b c d", "0", "1.4E-45", "e", "C-88" },
- { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" },
- { "J", "y a b c d e f", "4", ".5", "b", "C-7" },
- { "W", "g", "1", null, null, null },
- { "X", "g", "1", "0.1", null, null },
- { "Y", "g", "1", "0.2", null, null },
- { "Z", "f g", null, null, null, null }
+ // tracer contents int float string custom i18n
+ { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche"},
+ { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT"},
+ { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9"},
+ { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT"},
+ { "E", "x a b c d", "5", "2f", "h", "B-8", "peach"},
+ { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T"},
+ { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin"},
+ { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T"},
+ { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn"},
+ { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT"},
+ { "W", "g", "1", null, null, null, null},
+ { "X", "g", "1", "0.1", null, null, null},
+ { "Y", "g", "1", "0.2", null, null, null},
+ { "Z", "f g", null, null, null, null, null}
};
// create an index of all the documents, or just the x, or just the y documents
@@ -126,6 +127,7 @@
if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
+ if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED));
doc.setBoost(2); // produce some scores above 1.0
writer.addDocument (doc);
}
@@ -342,6 +344,40 @@
assertMatches (full, queryY, sort, "BFHJD");
}
+ // test using various international locales with accented characters
+ // (which sort differently depending on locale)
+ public void testInternationalSort() throws Exception {
+ sort.setSort (new SortField ("i18n", Locale.US));
+ assertMatches (full, queryY, sort, "BFJDH");
+
+ sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
+ assertMatches (full, queryY, sort, "BJDFH");
+
+ sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
+ assertMatches (full, queryY, sort, "BJDHF");
+
+ sort.setSort (new SortField ("i18n", Locale.US));
+ assertMatches (full, queryX, sort, "ECAGI");
+
+ sort.setSort (new SortField ("i18n", Locale.FRANCE));
+ assertMatches (full, queryX, sort, "EACGI");
+ }
+
+ // Test the MultiSearcher's ability to preserve locale-sensitive ordering
+ // by wrapping it around a single searcher
+ public void testInternationalMultiSearcherSort() throws Exception {
+ Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
+
+ sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
+ assertMatches (multiSearcher, queryY, sort, "BJDFH");
+
+ sort.setSort (new SortField ("i18n", Locale.US));
+ assertMatches (multiSearcher, queryY, sort, "BFJDH");
+
+ sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
+ assertMatches (multiSearcher, queryY, sort, "BJDHF");
+ }
+
// test a custom sort function
public void testCustomSorts() throws Exception {
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));