You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by yo...@apache.org on 2006/04/06 06:02:10 UTC

svn commit: r391895 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/FieldCacheImpl.java src/java/org/apache/lucene/search/FieldSortedHitQueue.java src/test/org/apache/lucene/search/TestSort.java

Author: yonik
Date: Wed Apr  5 21:02:09 2006
New Revision: 391895

URL: http://svn.apache.org/viewcvs?rev=391895&view=rev
Log:
FieldSortedHitQueue - subsequent String sorts with different locales sort identically: LUCENE-526

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Apr  5 21:02:09 2006
@@ -34,6 +34,9 @@
     that sometimes caused the index order of documents to change.
     (Yonik Seeley)
 
+ 7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused
+    subsequent String sorts with different locales to sort identically.
+    (Paul Cowan via Yonik Seeley)
 
 1.9.1
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Apr  5 21:02:09 2006
@@ -23,6 +23,7 @@
 import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
 
 import java.io.IOException;
+import java.util.Locale;
 import java.util.Map;
 import java.util.WeakHashMap;
 import java.util.HashMap;
@@ -45,12 +46,14 @@
     final String field;        // which Field
     final int type;            // which SortField type
     final Object custom;       // which custom comparator
+    final Locale locale;       // the locale we're sorting (if string)
 
     /** Creates one of these objects. */
-    Entry (String field, int type) {
+    Entry (String field, int type, Locale locale) {
       this.field = field.intern();
       this.type = type;
       this.custom = null;
+      this.locale = locale;
     }
 
     /** Creates one of these objects for a custom comparator. */
@@ -58,6 +61,7 @@
       this.field = field.intern();
       this.type = SortField.CUSTOM;
       this.custom = custom;
+      this.locale = null;
     }
 
     /** Two of these are equal iff they reference the same field and type. */
@@ -65,10 +69,12 @@
       if (o instanceof Entry) {
         Entry other = (Entry) o;
         if (other.field == field && other.type == type) {
-          if (other.custom == null) {
-            if (custom == null) return true;
-          } else if (other.custom.equals (custom)) {
-            return true;
+          if (other.locale == null ? locale == null : other.locale.equals(locale)) {
+            if (other.custom == null) {
+              if (custom == null) return true;
+            } else if (other.custom.equals (custom)) {
+              return true;
+            }
           }
         }
       }
@@ -77,7 +83,7 @@
 
     /** Composes a hashcode based on the field and type. */
     public int hashCode() {
-      return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode());
+      return field.hashCode() ^ type ^ (custom==null ? 0 : custom.hashCode()) ^ (locale==null ? 0 : locale.hashCode());
     }
   }
 
@@ -97,8 +103,8 @@
   final Map cache = new WeakHashMap();
 
   /** See if an object is in the cache. */
-  Object lookup (IndexReader reader, String field, int type) {
-    Entry entry = new Entry (field, type);
+  Object lookup (IndexReader reader, String field, int type, Locale locale) {
+    Entry entry = new Entry (field, type, locale);
     synchronized (this) {
       HashMap readerCache = (HashMap)cache.get(reader);
       if (readerCache == null) return null;
@@ -117,8 +123,8 @@
   }
 
   /** Put an object into the cache. */
-  Object store (IndexReader reader, String field, int type, Object value) {
-    Entry entry = new Entry (field, type);
+  Object store (IndexReader reader, String field, int type, Locale locale, Object value) {
+    Entry entry = new Entry (field, type, locale);
     synchronized (this) {
       HashMap readerCache = (HashMap)cache.get(reader);
       if (readerCache == null) {
@@ -215,7 +221,7 @@
   public String[] getStrings (IndexReader reader, String field)
   throws IOException {
     field = field.intern();
-    Object ret = lookup (reader, field, SortField.STRING);
+    Object ret = lookup (reader, field, SortField.STRING, null);
     if (ret == null) {
       final String[] retArray = new String[reader.maxDoc()];
       TermDocs termDocs = reader.termDocs();
@@ -234,7 +240,7 @@
         termDocs.close();
         termEnum.close();
       }
-      store (reader, field, SortField.STRING, retArray);
+      store (reader, field, SortField.STRING, null, retArray);
       return retArray;
     }
     return (String[]) ret;
@@ -244,7 +250,7 @@
   public StringIndex getStringIndex (IndexReader reader, String field)
   throws IOException {
     field = field.intern();
-    Object ret = lookup (reader, field, STRING_INDEX);
+    Object ret = lookup (reader, field, STRING_INDEX, null);
     if (ret == null) {
       final int[] retArray = new int[reader.maxDoc()];
       String[] mterms = new String[reader.maxDoc()+1];
@@ -295,7 +301,7 @@
       }
 
       StringIndex value = new StringIndex (retArray, mterms);
-      store (reader, field, STRING_INDEX, value);
+      store (reader, field, STRING_INDEX, null, value);
       return value;
     }
     return (StringIndex) ret;
@@ -316,7 +322,7 @@
   public Object getAuto (IndexReader reader, String field)
   throws IOException {
     field = field.intern();
-    Object ret = lookup (reader, field, SortField.AUTO);
+    Object ret = lookup (reader, field, SortField.AUTO, null);
     if (ret == null) {
       TermEnum enumerator = reader.terms (new Term (field, ""));
       try {
@@ -350,7 +356,7 @@
             }
           }
           if (ret != null) {
-            store (reader, field, SortField.AUTO, ret);
+            store (reader, field, SortField.AUTO, null, ret);
           }
         } else {
           throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Wed Apr  5 21:02:09 2006
@@ -56,7 +56,12 @@
     for (int i=0; i<n; ++i) {
       String fieldname = fields[i].getField();
       comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory());
-      this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
+      
+      if (comparators[i].sortType() == SortField.STRING) {
+    	  this.fields[i] = new SortField (fieldname, fields[i].getLocale(), fields[i].getReverse());
+      } else {
+    	  this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
+      }
     }
     initialize (size);
   }
@@ -147,10 +152,10 @@
   static final Map Comparators = new WeakHashMap();
 
   /** Returns a comparator if it is in the cache. */
-  static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) {
+  static ScoreDocComparator lookup (IndexReader reader, String field, int type, Locale locale, Object factory) {
     FieldCacheImpl.Entry entry = (factory != null)
       ? new FieldCacheImpl.Entry (field, factory)
-      : new FieldCacheImpl.Entry (field, type);
+      : new FieldCacheImpl.Entry (field, type, locale);
     synchronized (Comparators) {
       HashMap readerCache = (HashMap)Comparators.get(reader);
       if (readerCache == null) return null;
@@ -159,10 +164,10 @@
   }
 
   /** Stores a comparator into the cache. */
-  static Object store (IndexReader reader, String field, int type, Object factory, Object value) {
+  static Object store (IndexReader reader, String field, int type, Locale locale, Object factory, Object value) {
     FieldCacheImpl.Entry entry = (factory != null)
       ? new FieldCacheImpl.Entry (field, factory)
-      : new FieldCacheImpl.Entry (field, type);
+      : new FieldCacheImpl.Entry (field, type, locale);
     synchronized (Comparators) {
       HashMap readerCache = (HashMap)Comparators.get(reader);
       if (readerCache == null) {
@@ -177,7 +182,7 @@
   throws IOException {
     if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
     if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
-    ScoreDocComparator comparator = lookup (reader, fieldname, type, factory);
+    ScoreDocComparator comparator = lookup (reader, fieldname, type, locale, factory);
     if (comparator == null) {
       switch (type) {
         case SortField.AUTO:
@@ -199,7 +204,7 @@
         default:
           throw new RuntimeException ("unknown field type: "+type);
       }
-      store (reader, fieldname, type, factory, comparator);
+      store (reader, fieldname, type, locale, factory, comparator);
     }
     return comparator;
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java?rev=391895&r1=391894&r2=391895&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSort.java Wed Apr  5 21:02:09 2006
@@ -94,22 +94,23 @@
 	// the int field to sort by int
 	// the float field to sort by float
 	// the string field to sort by string
+    // the i18n field includes accented characters for testing locale-specific sorting
 	private String[][] data = new String[][] {
-	// tracer  contents         int            float           string   custom
-	{   "A",   "x a",           "5",           "4f",           "c",     "A-3"   },
-	{   "B",   "y a",           "5",           "3.4028235E38", "i",     "B-10"  },
-	{   "C",   "x a b c",       "2147483647",  "1.0",          "j",     "A-2"   },
-	{   "D",   "y a b c",       "-1",          "0.0f",         "a",     "C-0"   },
-	{   "E",   "x a b c d",     "5",           "2f",           "h",     "B-8"   },
-	{   "F",   "y a b c d",     "2",           "3.14159f",     "g",     "B-1"   },
-	{   "G",   "x a b c d",     "3",           "-1.0",         "f",     "C-100" },
-	{   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88"  },
-	{   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10"  },
-	{   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7"   },
-	{   "W",   "g",             "1",           null,           null,    null    },
-	{   "X",   "g",             "1",           "0.1",          null,    null    },
-	{   "Y",   "g",             "1",           "0.2",          null,    null    },
-	{   "Z",   "f g",           null,          null,           null,    null    }
+	// tracer  contents         int            float           string   custom   i18n
+	{   "A",   "x a",           "5",           "4f",           "c",     "A-3",   "p\u00EAche"},
+	{   "B",   "y a",           "5",           "3.4028235E38", "i",     "B-10",  "HAT"},
+	{   "C",   "x a b c",       "2147483647",  "1.0",          "j",     "A-2",   "p\u00E9ch\u00E9"},
+	{   "D",   "y a b c",       "-1",          "0.0f",         "a",     "C-0",   "HUT"},
+	{   "E",   "x a b c d",     "5",           "2f",           "h",     "B-8",   "peach"},
+	{   "F",   "y a b c d",     "2",           "3.14159f",     "g",     "B-1",   "H\u00C5T"},
+	{   "G",   "x a b c d",     "3",           "-1.0",         "f",     "C-100", "sin"},
+	{   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88",  "H\u00D8T"},
+	{   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10",  "s\u00EDn"},
+	{   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7",   "HOT"},
+	{   "W",   "g",             "1",           null,           null,    null,    null},
+	{   "X",   "g",             "1",           "0.1",          null,    null,    null},
+	{   "Y",   "g",             "1",           "0.2",          null,    null,    null},
+	{   "Z",   "f g",           null,          null,           null,    null,    null}
 	};
 
 	// create an index of all the documents, or just the x, or just the y documents
@@ -126,6 +127,7 @@
 				if (data[i][3] != null) doc.add (new Field ("float",    data[i][3], Field.Store.NO, Field.Index.UN_TOKENIZED));
 				if (data[i][4] != null) doc.add (new Field ("string",   data[i][4], Field.Store.NO, Field.Index.UN_TOKENIZED));
 				if (data[i][5] != null) doc.add (new Field ("custom",   data[i][5], Field.Store.NO, Field.Index.UN_TOKENIZED));
+				if (data[i][6] != null) doc.add (new Field ("i18n",     data[i][6], Field.Store.NO, Field.Index.UN_TOKENIZED));
         doc.setBoost(2);  // produce some scores above 1.0
 				writer.addDocument (doc);
 			}
@@ -342,6 +344,40 @@
 		assertMatches (full, queryY, sort, "BFHJD");
 	}
 
+	// test using various international locales with accented characters
+	// (which sort differently depending on locale)
+	public void testInternationalSort() throws Exception {
+		sort.setSort (new SortField ("i18n", Locale.US));
+		assertMatches (full, queryY, sort, "BFJDH");
+
+		sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
+		assertMatches (full, queryY, sort, "BJDFH");
+
+		sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
+		assertMatches (full, queryY, sort, "BJDHF");
+
+		sort.setSort (new SortField ("i18n", Locale.US));
+		assertMatches (full, queryX, sort, "ECAGI");
+
+		sort.setSort (new SortField ("i18n", Locale.FRANCE));
+		assertMatches (full, queryX, sort, "EACGI");
+	}
+    
+    // Test the MultiSearcher's ability to preserve locale-sensitive ordering
+    // by wrapping it around a single searcher
+	public void testInternationalMultiSearcherSort() throws Exception {
+		Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
+		
+		sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
+		assertMatches (multiSearcher, queryY, sort, "BJDFH");
+		
+		sort.setSort (new SortField ("i18n", Locale.US));
+		assertMatches (multiSearcher, queryY, sort, "BFJDH");
+		
+		sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
+		assertMatches (multiSearcher, queryY, sort, "BJDHF");
+	} 
+    
 	// test a custom sort function
 	public void testCustomSorts() throws Exception {
 		sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));