You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by va...@apache.org on 2009/09/29 23:55:59 UTC

svn commit: r820102 [2/2] - in /lucene/pylucene/trunk: ./ java/org/apache/pylucene/search/ java/org/apache/pylucene/search/highlight/ jcc/ jcc/_jcc/java/lang/ jcc/jcc/ jcc/jcc/sources/ samples/LuceneInAction/lia/searching/ test/

Modified: lucene/pylucene/trunk/test/test_Sort.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_Sort.py?rev=820102&r1=820101&r2=820102&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_Sort.py (original)
+++ lucene/pylucene/trunk/test/test_Sort.py Tue Sep 29 21:55:57 2009
@@ -12,286 +12,156 @@
 #   limitations under the License.
 # ====================================================================
 
-import re
+import math
 
+from itertools import izip
+from random import randint
 from unittest import TestCase, main
 from lucene import *
 
+NUM_STRINGS = 6000
+
+
 
 class SortTestCase(TestCase):
     """
     Unit tests for sorting code, ported from Java Lucene
     """
 
-    # document data:
-    # the tracer field is used to determine which document was hit
-    # the contents field is used to search and sort by relevance
-    # the int field to sort by int
-    # the float field to sort by float
-    # the string field to sort by string
-
-    data = [
-    #     tracer  contents          int           float          string  custom
-        [   "A",   "x a",           "5",           "4f",           "c",   "A-3"   ],
-        [   "B",   "y a",           "5",           "3.4028235E38", "i",   "B-10"  ],
-        [   "C",   "x a b c",       "2147483647",  "1.0",          "j",   "A-2"   ],
-        [   "D",   "y a b c",       "-1",          "0.0f",         "a",   "C-0"   ],
-        [   "E",   "x a b c d",     "5",           "2f",           "h",   "B-8"   ],
-        [   "F",   "y a b c d",     "2",           "3.14159f",     "g",   "B-1"   ],
-        [   "G",   "x a b c d",     "3",           "-1.0",         "f",   "C-100" ],
-        [   "H",   "y a b c d",     "0",           "1.4E-45",      "e",   "C-88"  ],
-        [   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",   "A-10"  ],
-        [   "J",   "y a b c d e f", "4",           ".5",           "b",   "C-7"   ],
-	[   "W",   "g",             "1",           None,           None,  None    ],
-	[   "X",   "g",             "1",           "0.1",          None,  None    ],
-	[   "Y",   "g",             "1",           "0.2",          None,  None    ],
-	[   "Z",   "f g",           None,          None,           None,  None    ]
-        ]
+    def __init__(self, *args, **kwds):
 
+        super(SortTestCase, self).__init__(*args, **kwds)
+
+        self.data = [
+    #      tracer  contents         int            float           string   custom   i18n               long                  double,                short,                byte,           custom parser encoding'
+        [   "A",   "x a",           "5",           "4f",           "c",    "A-3",   u"p\u00EAche",      "10",                  "-4.0",                "3",                  "126",          "J"  ],
+        [   "B",   "y a",           "5",           "3.4028235E38", "i",    "B-10",  "HAT",             "1000000000",          "40.0",                "24",                 "1",            "I"  ],
+        [   "C",   "x a b c",       "2147483647",  "1.0",          "j",    "A-2",   u"p\u00E9ch\u00E9", "99999999",            "40.00002343",         "125",                "15",           "H"  ],
+        [   "D",   "y a b c",       "-1",          "0.0f",         "a",     "C-0",   "HUT",             str(Long.MAX_VALUE),  str(Double.MIN_VALUE), str(Short.MIN_VALUE), str(Byte.MIN_VALUE), "G"  ],
+        [   "E",   "x a b c d",     "5",           "2f",           "h",     "B-8",   "peach",           str(Long.MIN_VALUE),  str(Double.MAX_VALUE), str(Short.MAX_VALUE), str(Byte.MAX_VALUE), "F"  ],
+        [   "F",   "y a b c d",     "2",           "3.14159f",     "g",     "B-1",   u"H\u00C5T",        "-44",                "343.034435444",       "-3",                 "0",            "E"  ],
+        [   "G",   "x a b c d",     "3",           "-1.0",         "f",     "C-100", "sin",             "323254543543",       "4.043544",            "5",                  "100",          "D"  ],
+        [   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88",  u"H\u00D8T",        "1023423423005",      "4.043545",            "10",                 "-50",          "C"  ],
+        [   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10",  u"s\u00EDn",        "332422459999",       "4.043546",            "-340",               "51",           "B"  ],
+        [   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7",   "HOT",             "34334543543",        "4.0000220343",        "300",                "2",            "A"  ],
+        [   "W",   "g",             "1",           None,           None,    None,    None,              None,                 None,                  None,                 None,           None  ],
+        [   "X",   "g",             "1",           "0.1",          None,    None,    None,              None,                 None,                  None,                 None,           None  ],
+        [   "Y",   "g",             "1",           "0.2",          None,    None,    None,              None,                 None,                  None,                 None,           None  ],
+        [   "Z",   "f g",           None,          None,           None,    None,    None,              None,                 None,                  None,                 None,           None  ],
+        ]
 
     def _getIndex(self, even, odd):
-        """
-        Create an index of all the documents, or just the x,
-        or just the y documents
-        """
-        
+
         indexStore = RAMDirectory()
-        writer = IndexWriter(indexStore, SimpleAnalyzer(), True)
+        writer = IndexWriter(indexStore, SimpleAnalyzer(), True,
+                             IndexWriter.MaxFieldLength.LIMITED)
+        writer.setMaxBufferedDocs(2)
+        writer.setMergeFactor(1000)
 
-        for i in xrange(0, len(self.data)):
-            if i % 2 == 0 and even or i % 2 == 1 and odd:
+        for i in xrange(len(self.data)):
+            if (i % 2 == 0 and even) or (i % 2 == 1 and odd):
                 doc = Document()
-                doc.add(Field("tracer", self.data[i][0],
-                              Field.Store.YES, Field.Index.NO))
-                doc.add(Field("contents", self.data[i][1],
-                              Field.Store.NO, Field.Index.TOKENIZED))
+                doc.add(Field("tracer", self.data[i][0], Field.Store.YES,
+                              Field.Index.NO))
+                doc.add(Field("contents", self.data[i][1], Field.Store.NO,
+                              Field.Index.ANALYZED))
                 if self.data[i][2] is not None:
-                    doc.add(Field("int", self.data[i][2],
-                                  Field.Store.NO, Field.Index.UN_TOKENIZED))
+                    doc.add(Field("int", self.data[i][2], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
                 if self.data[i][3] is not None:
-                    doc.add(Field("float", self.data[i][3],
-                                  Field.Store.NO, Field.Index.UN_TOKENIZED))
+                    doc.add(Field("float", self.data[i][3], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
                 if self.data[i][4] is not None:
-                    doc.add(Field("string", self.data[i][4],
-                                  Field.Store.NO, Field.Index.UN_TOKENIZED))
+                    doc.add(Field("string", self.data[i][4], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
                 if self.data[i][5] is not None:
-                    doc.add(Field("custom", self.data[i][5],
-                                  Field.Store.NO, Field.Index.UN_TOKENIZED))
+                    doc.add(Field("custom", self.data[i][5], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][6] is not None:
+                    doc.add(Field("i18n", self.data[i][6], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][7] is not None:
+                    doc.add(Field("long", self.data[i][7], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][8] is not None:
+                    doc.add(Field("double", self.data[i][8], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][9] is not None:
+                    doc.add(Field("short", self.data[i][9], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][10] is not None:
+                    doc.add(Field("byte", self.data[i][10], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                if self.data[i][11] is not None:
+                    doc.add(Field("parser", self.data[i][11], Field.Store.NO,
+                                  Field.Index.NOT_ANALYZED))
+                doc.setBoost(2.0)  # produce some scores above 1.0
                 writer.addDocument(doc)
-
-        writer.optimize()
+        # writer.optimize()
         writer.close()
+        s = IndexSearcher(indexStore)
+        s.setDefaultFieldSortScoring(True, True)
 
-        return IndexSearcher(indexStore)
+        return s
 
     def _getFullIndex(self):
         return self._getIndex(True, True)
 
-    def _getXIndex(self):
-        return self._getIndex(True, False)
-
-    def _getYIndex(self):
-        return self._getIndex(False, True)
-
-    def _getEmptyIndex(self):
-        return self._getIndex(False, False)
+    def getFullStrings(self):
 
-    def _assertMatches(self, searcher, query, sort, expectedResult):
-        """
-        Make sure the documents returned by the search match the expected list
-        """
+        indexStore = RAMDirectory()
+        writer = IndexWriter(indexStore, SimpleAnalyzer(), True,
+                             IndexWriter.MaxFieldLength.LIMITED)
+        writer.setMaxBufferedDocs(4)
+        writer.setMergeFactor(97)
         
-        buff = ''.join([''.join(Hit.cast_(hit).getDocument().getValues("tracer"))
-                        for hit in searcher.search(query, sort)])
-
-        self.assertEqual(expectedResult, buff)
-
-    def _assertMatchesPattern(self, searcher, query, sort, pattern):
-        """
-        make sure the documents returned by the search match the expected
-        list pattern
-        """
-
-        buff = ''.join([''.join(Hit.cast_(hit).getDocument().getValues("tracer"))
-                        for hit in searcher.search(query, sort)])
-
-        self.assert_(re.compile(pattern).match(buff))
-
-    def _getComparatorSource(self):
-        return self._getComparator()
-
-    def _getComparable(self, termtext):
-
-        class comparable(PythonComparable):
-            def __init__(self, termText):
-                super(comparable, self).__init__()
-                self.string_part, self.int_part = termText.split('-')
-                self.int_part = int(self.int_part)
-            def compareTo(self, o):
-                return (cmp(self.string_part, o.string_part) or
-                        cmp(self.int_part, o.int_part))
-
-        return comparable(termtext)
-
-    def _getComparator(self):
-
-        class comparator(PythonSortComparator):
-
-            def getComparable(_self, termText):
-                return self._getComparable(termText)
-
-            def newComparator(_self, reader, fieldname):
-                 enumerator = reader.terms(Term(fieldname, ""))
-
-                 class comparator(PythonScoreDocComparator):
-                     def __init__(_self, cache):
-                         super(comparator, _self).__init__()
-                         _self.cache = cache
-                     def compare(_self, i, j):
-                         return _self.cache[i.doc].compareTo(_self.cache[j.doc])
-                     def sortType(_self):
-                         return SortField.CUSTOM
-                     def sortValue(_self, i):
-                         return _self.cache[i.doc]
-
-                 try:
-                     cache = self._fillCache(reader, enumerator, fieldname)
-                     return comparator(cache)
-                 finally:
-                     enumerator.close()
-
-        return comparator()
-
-    def _fillCache(self, reader, enumerator, fieldName):
-        """
-        Returns an array of objects which represent that natural order
-        of the term values in the given field.
-
-        @param reader     Terms are in this index.
-        @param enumerator Use this to get the term values and TermDocs.
-        @param fieldname  Comparables should be for this field.
-        @return Array of objects representing natural order of terms in field.
-        """
-
-        retArray = [None] * reader.maxDoc()
-        if len(retArray) > 0:
-            termDocs = reader.termDocs()
-            try:
-                if enumerator.term() is None:
-                    raise RuntimeError, "no terms in field " + fieldName
-                while True:
-                    term = enumerator.term()
-                    if term.field() != fieldName:
-                        break
-                    termval = self._getComparable(term.text())
-                    termDocs.seek(enumerator)
-                    while termDocs.next():
-                        retArray[termDocs.doc()] = termval
-                    if not enumerator.next():
-                        break
-            finally:
-                termDocs.close()
-
-        return retArray
-
-    def _runMultiSorts(self, multi):
-        """
-        runs a variety of sorts useful for multisearchers
-        """
-
-        sort = Sort()
+        for i in xrange(NUM_STRINGS):
+            doc = Document()
+            num = self.getRandomCharString(self.getRandomNumber(2, 8), 48, 52)
+            doc.add(Field("tracer", num, Field.Store.YES, Field.Index.NO))
+            # doc.add(Field("contents", str(i), Field.Store.NO,
+            #         Field.Index.ANALYZED))
+            doc.add(Field("string", num, Field.Store.NO,
+                          Field.Index.NOT_ANALYZED))
+            num2 = self.getRandomCharString(self.getRandomNumber(1, 4), 48, 50)
+            doc.add(Field("string2", num2, Field.Store.NO,
+                          Field.Index.NOT_ANALYZED))
+            doc.add(Field("tracer2", num2, Field.Store.YES, Field.Index.NO))
+            doc.setBoost(2.0)  # produce some scores above 1.0
+            writer.setMaxBufferedDocs(self.getRandomNumber(2, 12))
+            writer.addDocument(doc)
+      
+        # writer.optimize()
+        # print writer.getSegmentCount()
+        writer.close()
 
-        sort.setSort(SortField.FIELD_DOC)
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "[AB]{2}[CD]{2}[EF]{2}[GH]{2}[IJ]{2}")
+        return IndexSearcher(indexStore)
+  
+    def getRandomNumberString(self, num, low, high):
 
-        sort.setSort(SortField("int", SortField.INT))
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "IDHFGJ[ABE]{3}C")
+        return ''.join([self.getRandomNumber(low, high) for i in xrange(num)])
+  
+    def getRandomCharString(self, num):
+
+        return self.getRandomCharString(num, 48, 122)
+  
+    def getRandomCharString(self, num,  start, end):
         
-        sort.setSort([SortField("int", SortField.INT),
-                      SortField.FIELD_DOC])
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "IDHFGJ[AB]{2}EC")
-
-        sort.setSort("int")
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "IDHFGJ[AB]{2}EC")
-
-        sort.setSort([SortField("float", SortField.FLOAT),
-                      SortField.FIELD_DOC])
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "GDHJ[CI]{2}EFAB")
-
-        sort.setSort("float")
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "GDHJ[CI]{2}EFAB")
-
-        sort.setSort("string")
-        self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
-
-        sort.setSort("int", True)
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "C[AB]{2}EJGFHDI")
-
-        sort.setSort("float", True)
-        self._assertMatchesPattern(multi, self.queryA, sort,
-                                   "BAFE[IC]{2}JHDG")
-
-        sort.setSort("string", True)
-        self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
-
-        sort.setSort([SortField("string", Locale.US)])
-        self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
-
-        sort.setSort([SortField("string", Locale.US, True)])
-        self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
-
-        sort.setSort(["int", "float"])
-        self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC")
-
-        sort.setSort(["float", "string"])
-        self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB")
-
-        sort.setSort("int")
-        self._assertMatches(multi, self.queryF, sort, "IZJ")
-
-        sort.setSort("int", True)
-        self._assertMatches(multi, self.queryF, sort, "JZI")
-
-        sort.setSort("float")
-        self._assertMatches(multi, self.queryF, sort, "ZJI")
+        return ''.join([chr(self.getRandomNumber(start, end))
+                        for i in xrange(num)])
+  
+    def getRandomNumber(self, low, high):
+  
+        return randint(low, high)
 
-        sort.setSort("string")
-        self._assertMatches(multi, self.queryF, sort, "ZJI")
-
-        sort.setSort("string", True)
-        self._assertMatches(multi, self.queryF, sort, "IJZ")
-
-    def _getScores(self, hits):
-
-        scoreMap = {}
-
-        for hit in hits:
-            hit = Hit.cast_(hit)
-            doc = hit.getDocument()
-            v = doc.getValues("tracer")
-            self.assertEqual(len(v), 1)
-            scoreMap[v[0]] = float(hit.getScore())
-
-        return scoreMap
-
-    def _assertSameValues(self, m1, m2):
-        """
-        make sure all the values in the maps match
-        """
+    def _getXIndex(self):
+        return self._getIndex(True, False)
 
-        n = len(m1)
-        m = len(m2)
-        self.assertEqual(n, m)
+    def _getYIndex(self):
+        return self._getIndex(False, True)
 
-        for key in m1.iterkeys():
-            self.assertEqual(m1[key], m2[key])
+    def _getEmptyIndex(self):
+        return self._getIndex(False, False)
 
     def setUp(self):
 
@@ -301,17 +171,9 @@
         self.queryX = TermQuery(Term("contents", "x"))
         self.queryY = TermQuery(Term("contents", "y"))
         self.queryA = TermQuery(Term("contents", "a"))
+        self.queryE = TermQuery(Term("contents", "e"))
         self.queryF = TermQuery(Term("contents", "f"))
-
-    def tearDown(self):
-        
-        del self.full
-        del self.queryX
-        del self.queryY
-        del self.queryA
-        del self.queryF
-        del self.searchX
-        del self.searchY
+        self.queryG = TermQuery(Term("contents", "g"))
 
     def testBuiltInSorts(self):
         """
@@ -332,6 +194,7 @@
         """
 
         sort = Sort()
+
         sort.setSort([SortField("int", SortField.INT),
                       SortField.FIELD_DOC])
         self._assertMatches(self.full, self.queryX, sort, "IGAEC")
@@ -342,11 +205,156 @@
         self._assertMatches(self.full, self.queryX, sort, "GCIEA")
         self._assertMatches(self.full, self.queryY, sort, "DHJFB")
 
+        sort.setSort([SortField("long", SortField.LONG),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryX, sort, "EACGI")
+        self._assertMatches(self.full, self.queryY, sort, "FBJHD")
+
+        sort.setSort([SortField("double", SortField.DOUBLE),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryX, sort, "AGICE")
+        self._assertMatches(self.full, self.queryY, sort, "DJHBF")
+
+        sort.setSort([SortField("byte", SortField.BYTE),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryX, sort, "CIGAE")
+        self._assertMatches(self.full, self.queryY, sort, "DHFBJ")
+
+        sort.setSort([SortField("short", SortField.SHORT),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryX, sort, "IAGCE")
+        self._assertMatches(self.full, self.queryY, sort, "DFHBJ")
+
         sort.setSort([SortField("string", SortField.STRING),
                       SortField.FIELD_DOC])
         self._assertMatches(self.full, self.queryX, sort, "AIGEC")
         self._assertMatches(self.full, self.queryY, sort, "DJHFB")
+  
+    def testStringSort(self):
+        """
+        Test String sorting: small queue to many matches, multi field sort,
+        reverse sort
+        """
 
+        sort = Sort()
+        searcher = self.getFullStrings()
+
+        sort.setSort([SortField("string", SortField.STRING),
+                      SortField("string2", SortField.STRING, True),
+                      SortField.FIELD_DOC])
+
+        result = searcher.search(MatchAllDocsQuery(), None, 500, sort).scoreDocs
+
+        buff = []
+        last = None
+        lastSub = None
+        lastDocId = 0
+        fail = False
+
+        for scoreDoc in result:
+            doc2 = searcher.doc(scoreDoc.doc)
+            v = doc2.getValues("tracer")
+            v2 = doc2.getValues("tracer2")
+            for _v, _v2 in izip(v, v2):
+                if last is not None:
+                    _cmp = cmp(_v, last)
+                    if _cmp < 0: # ensure first field is in order
+                        fail = True
+                        print "fail:", _v, "<", last
+
+                    if _cmp == 0: # ensure second field is in reverse order
+                        _cmp = cmp(_v2, lastSub)
+                        if _cmp > 0:
+                            fail = True
+                            print "rev field fail:", _v2, ">", lastSub
+                        elif _cmp == 0: # ensure docid is in order
+                            if scoreDoc.doc < lastDocId:
+                                fail = True
+                                print "doc fail:", scoreDoc.doc, ">", lastDocId
+
+                last = _v
+                lastSub = _v2
+                lastDocId = scoreDoc.doc
+                buff.append(_v + "(" + _v2 + ")(" + str(scoreDoc.doc) + ") ")
+
+        if fail:
+            print "topn field1(field2)(docID):", ''.join(buff)
+
+        self.assert_(not fail, "Found sort results out of order")
+  
+    def testCustomFieldParserSort(self):
+        """
+        test sorts where the type of field is specified and a custom field
+        parser is used, that uses a simple char encoding. The sorted string
+        contains a character beginning from 'A' that is mapped to a numeric
+        value using some "funny" algorithm to be different for each data
+        type.
+        """
+
+        # since tests explicitly use different parsers on the same field name
+        # we explicitly check/purge the FieldCache between each assertMatch
+        fc = FieldCache.DEFAULT
+        
+        class intParser(PythonIntParser):
+            def parseInt(_self, val):
+                return (ord(val[0]) - ord('A')) * 123456
+
+        class floatParser(PythonFloatParser):
+            def parseFloat(_self, val):
+                return math.sqrt(ord(val[0]))
+
+        class longParser(PythonLongParser):
+            def parseLong(_self, val):
+                return (ord(val[0]) - ord('A')) * 1234567890L
+
+        class doubleParser(PythonDoubleParser):
+            def parseDouble(_self, val):
+                return math.pow(ord(val[0]), ord(val[0]) - ord('A'))
+
+        class byteParser(PythonByteParser):
+            def parseByte(_self, val):
+                return chr(ord(val[0]) - ord('A'))
+
+        class shortParser(PythonShortParser):
+            def parseShort(_self, val):
+                return ord(val[0]) - ord('A')
+
+        sort = Sort()
+        sort.setSort([SortField("parser", intParser()),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " IntParser")
+        fc.purgeAllCaches()
+
+        sort.setSort([SortField("parser", floatParser()),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " FloatParser")
+        fc.purgeAllCaches()
+
+        sort.setSort([SortField("parser", longParser()),
+                           SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " LongParser")
+        fc.purgeAllCaches()
+
+        sort.setSort([SortField("parser", doubleParser()),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " DoubleParser")
+        fc.purgeAllCaches()
+
+        sort.setSort([SortField("parser", byteParser()),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " ByteParser")
+        fc.purgeAllCaches()
+
+        sort.setSort([SortField("parser", shortParser()),
+                      SortField.FIELD_DOC])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+        self._assertSaneFieldCaches(self.getName() + " ShortParser")
+        fc.purgeAllCaches()
 
     def testEmptyIndex(self):
         """
@@ -361,8 +369,7 @@
         sort.setSort(SortField.FIELD_DOC)
         self._assertMatches(empty, self.queryX, sort, "")
 
-        sort.setSort([SortField("int", SortField.INT),
-                      SortField.FIELD_DOC])
+        sort.setSort([SortField("int", SortField.INT), SortField.FIELD_DOC])
         self._assertMatches(empty, self.queryX, sort, "")
 
         sort.setSort([SortField("string", SortField.STRING, True),
@@ -374,11 +381,19 @@
         self._assertMatches(empty, self.queryX, sort, "")
 
 
+    def testNewCustomFieldParserSort(self):
+        """
+        Test sorting w/ custom FieldComparator
+        """
+        sort = Sort()
+
+        sort.setSort([SortField("parser", MyFieldComparatorSource())])
+        self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
+
     def testAutoSort(self):
         """
         test sorts where the type of field is determined dynamically
         """
-
         sort = Sort()
 
         sort.setSort("int")
@@ -397,7 +412,6 @@
         """
         test sorts in reverse
         """
-
         sort = Sort()
 
         sort.setSort([SortField(None, SortField.SCORE, True),
@@ -423,17 +437,22 @@
 
     def testEmptyFieldSort(self):
         """
-        test sorting when the sort field is empty (undefined)
-        for some of the documents
+        test sorting when the sort field is empty(undefined) for some of the
+        documents
         """
-
         sort = Sort()
-        
+
         sort.setSort("string")
         self._assertMatches(self.full, self.queryF, sort, "ZJI")
 
         sort.setSort("string", True)
         self._assertMatches(self.full, self.queryF, sort, "IJZ")
+    
+        sort.setSort(SortField("i18n", Locale.ENGLISH))
+        self._assertMatches(self.full, self.queryF, sort, "ZJI")
+    
+        sort.setSort(SortField("i18n", Locale.ENGLISH, True))
+        self._assertMatches(self.full, self.queryF, sort, "IJZ")
 
         sort.setSort("int")
         self._assertMatches(self.full, self.queryF, sort, "IZJ")
@@ -444,16 +463,66 @@
         sort.setSort("float")
         self._assertMatches(self.full, self.queryF, sort, "ZJI")
 
+        # using a nonexisting field as first sort key shouldn't make a
+        # difference:
+        sort.setSort([SortField("nosuchfield", SortField.STRING),
+                      SortField("float")])
+        self._assertMatches(self.full, self.queryF, sort, "ZJI")
+
         sort.setSort("float", True)
         self._assertMatches(self.full, self.queryF, sort, "IJZ")
-        
+
+        # When a field is None for both documents, the next SortField should
+        # be used. 
+        # Works for
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float")])
+        self._assertMatches(self.full, self.queryG, sort, "ZWXY")
+
+        # Reverse the last criterium to make sure the test didn't pass by
+        # chance 
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float", True)])
+        self._assertMatches(self.full, self.queryG, sort, "ZYXW")
+
+        # Do the same for a MultiSearcher
+        multiSearcher = MultiSearcher([self.full])
+
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float")])
+        self._assertMatches(multiSearcher, self.queryG, sort, "ZWXY")
+
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float", True)])
+        self._assertMatches(multiSearcher, self.queryG, sort, "ZYXW")
+
+        # Don't close the multiSearcher. it would close the full searcher too!
+        # Do the same for a ParallelMultiSearcher
+        parallelSearcher = ParallelMultiSearcher([self.full])
+
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float")])
+        self._assertMatches(parallelSearcher, self.queryG, sort, "ZWXY")
+
+        sort.setSort([SortField("int"),
+                      SortField("string", SortField.STRING),
+                      SortField("float", True)])
+        self._assertMatches(parallelSearcher, self.queryG, sort, "ZYXW")
+
+        # Don't close the parallelSearcher. it would close the full searcher
+        # too!
+
     def testSortCombos(self):
         """
         test sorts using a series of fields
         """
-
         sort = Sort()
-        
+
         sort.setSort(["int", "float"])
         self._assertMatches(self.full, self.queryX, sort, "IGEAC")
 
@@ -468,9 +537,8 @@
         """
         test using a Locale for sorting strings
         """
-
         sort = Sort()
-        
+
         sort.setSort([SortField("string", Locale.US)])
         self._assertMatches(self.full, self.queryX, sort, "AIGEC")
         self._assertMatches(self.full, self.queryY, sort, "DJHFB")
@@ -479,23 +547,62 @@
         self._assertMatches(self.full, self.queryX, sort, "CEGIA")
         self._assertMatches(self.full, self.queryY, sort, "BFHJD")
 
+    def testInternationalSort(self):
+        """
+        test using various international locales with accented characters
+        (which sort differently depending on locale)
+        """
+        sort = Sort()
+
+        sort.setSort(SortField("i18n", Locale.US))
+        self._assertMatches(self.full, self.queryY, sort, "BFJDH")
+
+        sort.setSort(SortField("i18n", Locale("sv", "se")))
+        self._assertMatches(self.full, self.queryY, sort, "BJDFH")
+
+        sort.setSort(SortField("i18n", Locale("da", "dk")))
+        self._assertMatches(self.full, self.queryY, sort, "BJDHF")
+
+        sort.setSort(SortField("i18n", Locale.US))
+        self._assertMatches(self.full, self.queryX, sort, "ECAGI")
+
+        sort.setSort(SortField("i18n", Locale.FRANCE))
+        self._assertMatches(self.full, self.queryX, sort, "EACGI")
+
+    def testInternationalMultiSearcherSort(self):
+        """
+        Test the MultiSearcher's ability to preserve locale-sensitive ordering
+        by wrapping it around a single searcher
+        """
+        sort = Sort()
+
+        multiSearcher = MultiSearcher([self.full])
+        sort.setSort(SortField("i18n", Locale("sv", "se")))
+        self._assertMatches(multiSearcher, self.queryY, sort, "BJDFH")
+    
+        sort.setSort(SortField("i18n", Locale.US))
+        self._assertMatches(multiSearcher, self.queryY, sort, "BFJDH")
+    
+        sort.setSort(SortField("i18n", Locale("da", "dk")))
+        self._assertMatches(multiSearcher, self.queryY, sort, "BJDHF")
+    
     def testCustomSorts(self):
         """
         test a custom sort function
         """
-
         sort = Sort()
-        
-        sort.setSort(SortField("custom", self._getComparatorSource()))
+
+        sort.setSort(SortField("custom",
+                               SampleComparable.getComparatorSource()))
         self._assertMatches(self.full, self.queryX, sort, "CAIEG")
 
-        sort.setSort(SortField("custom", self._getComparatorSource(), True))
+        sort.setSort(SortField("custom",
+                               SampleComparable.getComparatorSource(), True))
         self._assertMatches(self.full, self.queryY, sort, "HJDBF")
 
-        custom = self._getComparator()
+        custom = SampleComparable.getComparator()
         sort.setSort(SortField("custom", custom))
         self._assertMatches(self.full, self.queryX, sort, "CAIEG")
-
         sort.setSort(SortField("custom", custom, True))
         self._assertMatches(self.full, self.queryY, sort, "HJDBF")
 
@@ -503,9 +610,9 @@
         """
         test a variety of sorts using more than one searcher
         """
-         
+        
         searcher = MultiSearcher([self.searchX, self.searchY])
-        self._runMultiSorts(searcher)
+        self.runMultiSorts(searcher, False)
 
     def testParallelMultiSort(self):
         """
@@ -513,7 +620,7 @@
         """
 
         searcher = ParallelMultiSearcher([self.searchX, self.searchY])
-        self._runMultiSorts(searcher)
+        self.runMultiSorts(searcher, False)
 
     def testNormalizedScores(self):
         """
@@ -521,94 +628,542 @@
         hits are sorted
         """
 
-        full = self.full
-
         # capture relevancy scores
-        scoresX = self._getScores(full.search(self.queryX))
-        scoresY = self._getScores(full.search(self.queryY))
-        scoresA = self._getScores(full.search(self.queryA))
-
-        # we'll test searching locally and multi
-        # note: the multi test depends on each separate index containing
-        # the same documents as our local index, so the computed normalization
-        # will be the same.  so we make a multi searcher over two equal document
-        # sets - not realistic, but necessary for testing.
-
-        queryX = self.queryX
-        queryY = self.queryY
-        queryA = self.queryA
+        scoresX = self.getScores(self.full.search(self.queryX, None,
+                                                  1000).scoreDocs, self.full)
+        scoresY = self.getScores(self.full.search(self.queryY, None,
+                                                  1000).scoreDocs, self.full)
+        scoresA = self.getScores(self.full.search(self.queryA, None,
+                                                  1000).scoreDocs, self.full)
 
+        # we'll test searching locally, remote and multi
         multi = MultiSearcher([self.searchX, self.searchY])
 
-        gs = self._getScores
-
         # change sorting and make sure relevancy stays the same
 
         sort = Sort()
-
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort(SortField.FIELD_DOC)
-        self._assertSameValues(scoresX, gs(full.search(queryX,sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort("int")
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort("float")
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort("string")
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort(["int", "float"])
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
 
         sort.setSort([SortField("int", True),
                       SortField(None, SortField.DOC, True)])
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
+
+        sort.setSort(["float", "string"])
+        self._assertSameValues(scoresX, self.getScores(self.full.search(self.queryX, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresX, self.getScores(multi.search(self.queryX, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresY, self.getScores(self.full.search(self.queryY, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresY, self.getScores(multi.search(self.queryY, None, 1000, sort).scoreDocs, multi))
+        self._assertSameValues(scoresA, self.getScores(self.full.search(self.queryA, None, 1000, sort).scoreDocs, self.full))
+        self._assertSameValues(scoresA, self.getScores(multi.search(self.queryA, None, 1000, sort).scoreDocs, multi))
+
+    def testTopDocsScores(self):
+        """
+        There was previously a bug in FieldSortedHitQueue.maxscore when only
+        a single doc was added.  That is what the following tests for.
+        """
+        
+        sort = Sort()
+        nDocs = 10
+
+        # try to pick a query that will result in an unnormalized
+        # score greater than 1 to test for correct normalization
+        docs1 = self.full.search(self.queryE, None, nDocs, sort)
+
+        # a filter that only allows through the first hit
+        class filter(PythonFilter):
+            def bits(_self, reader):
+                bs = BitSet(reader.maxDoc())
+                bs.set(0, reader.maxDoc())
+                bs.set(docs1.scoreDocs[0].doc)
+                return bs
+
+        filt = filter()
+
+        docs2 = self.full.search(self.queryE, filt, nDocs, sort)
+        self.assertEqual(docs1.scoreDocs[0].score,
+                         docs2.scoreDocs[0].score,
+                         1e-6)
+  
+    def testSortWithoutFillFields(self):
+        """
+        There was previously a bug in TopFieldCollector when fillFields was
+        set to False - the same doc and score was set in ScoreDoc[]
+        array. This test asserts that if fillFields is False, the documents
+        are set properly. It does not use Searcher's default search
+        methods(with Sort) since all set fillFields to True.
+        """
+
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+        for sort in sorts:
+            q = MatchAllDocsQuery()
+            tdc = TopFieldCollector.create(sort, 10, False,
+                                           False, False, True)
+            self.full.search(q, tdc)
+      
+            sds = tdc.topDocs().scoreDocs
+            for i in xrange(1, len(sds)):
+                self.assert_(sds[i].doc != sds[i - 1].doc)
+
+    def testSortWithoutScoreTracking(self):
+        """
+        Two Sort criteria to instantiate the multi/single comparators.
+        """
+
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+        for sort in sorts:
+            q = MatchAllDocsQuery()
+            tdc = TopFieldCollector.create(sort, 10, True, False,
+                                           False, True)
+      
+            self.full.search(q, tdc)
+      
+            tds = tdc.topDocs()
+            sds = tds.scoreDocs
+            for sd in sds:
+                self.assert_(Float.isNaN_(sd.score))
+
+            self.assert_(Float.isNaN_(tds.getMaxScore()))
+
+    def testSortWithScoreNoMaxScoreTracking(self):
+        """
+        Two Sort criteria to instantiate the multi/single comparators.
+        """
+        
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+        for sort in sorts:
+            q = MatchAllDocsQuery()
+            tdc = TopFieldCollector.create(sort, 10, True, True,
+                                           False, True)
+      
+            self.full.search(q, tdc)
+      
+            tds = tdc.topDocs()
+            sds = tds.scoreDocs
+            for sd in sds:
+                self.assert_(not Float.isNaN_(sd.score))
+
+            self.assert_(Float.isNaN_(tds.getMaxScore()))
+  
+    def testSortWithScoreAndMaxScoreTracking(self):
+        """
+        Two Sort criteria to instantiate the multi/single comparators.
+        """
+        
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+        for sort in sorts:
+            q = MatchAllDocsQuery()
+            tdc = TopFieldCollector.create(sort, 10, True, True,
+                                           True, True)
+      
+            self.full.search(q, tdc)
+      
+            tds = tdc.topDocs()
+            sds = tds.scoreDocs
+            for sd in sds:
+                self.assert_(not Float.isNaN_(sd.score))
+
+            self.assert_(not Float.isNaN_(tds.getMaxScore()))
+
+    def testOutOfOrderDocsScoringSort(self):
+        """
+        Two Sort criteria to instantiate the multi/single comparators.
+        """
+
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+
+        tfcOptions = [[False, False, False],
+                      [False, False, True],
+                      [False, True, False],
+                      [False, True, True],
+                      [True, False, False],
+                      [True, False, True],
+                      [True, True, False],
+                      [True, True, True]]
+
+        actualTFCClasses = [
+            "OutOfOrderOneComparatorNonScoringCollector", 
+            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
+            "OutOfOrderOneComparatorScoringNoMaxScoreCollector", 
+            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
+            "OutOfOrderOneComparatorNonScoringCollector", 
+            "OutOfOrderOneComparatorScoringMaxScoreCollector", 
+            "OutOfOrderOneComparatorScoringNoMaxScoreCollector", 
+            "OutOfOrderOneComparatorScoringMaxScoreCollector" 
+        ]
+    
+        # Save the original value to set later.
+        origVal = BooleanQuery.getAllowDocsOutOfOrder()
+
+        BooleanQuery.setAllowDocsOutOfOrder(True)
+        bq = BooleanQuery()
+
+        # Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
+        # which delegates to BS if there are no mandatory clauses.
+        bq.add(MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
+
+        # Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to
+        # return the clause instead of BQ.
+        bq.setMinimumNumberShouldMatch(1)
+
+        try:
+            for sort in sorts:
+                for tfcOption, actualTFCClass in izip(tfcOptions,
+                                                      actualTFCClasses):
+                    tdc = TopFieldCollector.create(sort, 10, tfcOption[0],
+                                                   tfcOption[1], tfcOption[2],
+                                                   False)
+
+                    self.assert_(tdc.getClass().getName().endswith("$" + actualTFCClass))
+          
+                    self.full.search(bq, tdc)
+          
+                    tds = tdc.topDocs()
+                    sds = tds.scoreDocs  
+                    self.assertEqual(10, len(sds))
+        finally:
+            # Whatever happens, reset BooleanQuery.allowDocsOutOfOrder to the
+            # original value. Don't set it to False in case the
+            # implementation in BQ will change some day.
+            BooleanQuery.setAllowDocsOutOfOrder(origVal)
+  
+    def testSortWithScoreAndMaxScoreTrackingNoResults(self):
+        """
+        Two Sort criteria to instantiate the multi/single comparators.
+        """
+
+        sorts = [Sort(SortField.FIELD_DOC), Sort()]
+        for sort in sorts:
+            tdc = TopFieldCollector.create(sort, 10, True, True, True, True)
+            tds = tdc.topDocs()
+            self.assertEqual(0, tds.totalHits)
+            self.assert_(Float.isNaN_(tds.getMaxScore()))
+  
+    def runMultiSorts(self, multi, isFull):
+        """
+        runs a variety of sorts useful for multisearchers
+        """
+        sort = Sort()
+
+        sort.setSort(SortField.FIELD_DOC)
+        expected = isFull and "ABCDEFGHIJ" or "ACEGIBDFHJ"
+        self._assertMatches(multi, self.queryA, sort, expected)
+
+        sort.setSort(SortField("int", SortField.INT))
+        expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
+        self._assertMatches(multi, self.queryA, sort, expected)
+
+        sort.setSort([SortField("int", SortField.INT), SortField.FIELD_DOC])
+        expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
+        self._assertMatches(multi, self.queryA, sort, expected)
+
+        sort.setSort("int")
+        expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
+        self._assertMatches(multi, self.queryA, sort, expected)
+
+        sort.setSort([SortField("float", SortField.FLOAT), SortField.FIELD_DOC])
+        self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
+
+        sort.setSort("float")
+        self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
+
+        sort.setSort("string")
+        self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
+
+        sort.setSort("int", True)
+        expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI"
+        self._assertMatches(multi, self.queryA, sort, expected)
+
+        sort.setSort("float", True)
+        self._assertMatches(multi, self.queryA, sort, "BAFECIJHDG")
+
+        sort.setSort("string", True)
+        self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
+
+        sort.setSort(["int", "float"])
+        self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC")
 
         sort.setSort(["float", "string"])
-        self._assertSameValues(scoresX, gs(full.search(queryX, sort)))
-        self._assertSameValues(scoresX, gs(multi.search(queryX, sort)))
-        self._assertSameValues(scoresY, gs(full.search(queryY, sort)))
-        self._assertSameValues(scoresY, gs(multi.search(queryY, sort)))
-        self._assertSameValues(scoresA, gs(full.search(queryA, sort)))
-        self._assertSameValues(scoresA, gs(multi.search(queryA, sort)))
+        self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB")
+
+        sort.setSort("int")
+        self._assertMatches(multi, self.queryF, sort, "IZJ")
+
+        sort.setSort("int", True)
+        self._assertMatches(multi, self.queryF, sort, "JZI")
+
+        sort.setSort("float")
+        self._assertMatches(multi, self.queryF, sort, "ZJI")
+
+        sort.setSort("string")
+        self._assertMatches(multi, self.queryF, sort, "ZJI")
+
+        sort.setSort("string", True)
+        self._assertMatches(multi, self.queryF, sort, "IJZ")
+
+        # up to this point, all of the searches should have "sane" 
+        # FieldCache behavior, and should have reused hte cache in several
+        # cases 
+        self._assertSaneFieldCaches(self.getName() + " various")
+        
+        # next we'll check Locale based(String[]) for 'string', so purge first
+        FieldCache.DEFAULT.purgeAllCaches()
+
+        sort.setSort([SortField("string", Locale.US)])
+        self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
+
+        sort.setSort([SortField("string", Locale.US, True)])
+        self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
+
+        sort.setSort([SortField("string", Locale.UK)])
+        self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
+
+        self._assertSaneFieldCaches(self.getName() + " Locale.US + Locale.UK")
+        FieldCache.DEFAULT.purgeAllCaches()
+
+    def _assertMatches(self, searcher, query, sort, expectedResult):
+        """
+        make sure the documents returned by the search match the expected
+        list
+        """
+
+        # ScoreDoc[] result = searcher.search(query, None, 1000, sort).scoreDocs
+        hits = searcher.search(query, None, len(expectedResult), sort)
+        sds = hits.scoreDocs
+
+        self.assertEqual(hits.totalHits, len(expectedResult))
+        buff = []
+        for sd in sds:
+            doc = searcher.doc(sd.doc)
+            v = doc.getValues("tracer")
+            for _v in v:
+                buff.append(_v)
+
+        self.assertEqual(expectedResult, ''.join(buff))
+
+    def getScores(self, hits, searcher):
+
+        scoreMap = {}
+        for hit in hits:
+            doc = searcher.doc(hit.doc)
+            v = doc.getValues("tracer")
+            self.assertEqual(len(v), 1)
+            scoreMap[v[0]] = hit.score
+
+        return scoreMap
+
+    def _assertSameValues(self, m1, m2):
+        """
+        make sure all the values in the maps match
+        """
+
+        self.assertEquals(len(m1), len(m2))
+        for key in m1.iterkeys():
+            self.assertEquals(m1[key], m2[key], 1e-6)
+
+    def getName(self):
+
+        return type(self).__name__
+
+    def _assertSaneFieldCaches(self, msg):
+
+        entries = FieldCache.DEFAULT.getCacheEntries()
+
+        insanity = FieldCacheSanityChecker.checkSanity(entries)
+        self.assertEqual(0, len(insanity),
+                         msg + ": Insane FieldCache usage(s) found")
+
+
+class MyFieldComparator(PythonFieldComparator):
+
+    def __init__(self, numHits):
+        super(MyFieldComparator, self).__init__()
+        self.slotValues = [0] * numHits
+
+    def copy(self, slot, doc):
+        self.slotValues[slot] = self.docValues[doc]
+
+    def compare(self, slot1, slot2):
+        return self.slotValues[slot1] - self.slotValues[slot2]
+
+    def compareBottom(self, doc):
+        return self.bottomValue - self.docValues[doc]
+
+    def setBottom(self, bottom):
+        self.bottomValue = self.slotValues[bottom]
+
+    def setNextReader(self, reader, docBase):
+        
+        class intParser(PythonIntParser):
+            def parseInt(_self, val):
+                return (ord(val[0]) - ord('A')) * 123456
+                
+        self.docValues = FieldCache.DEFAULT.getInts(reader, "parser",
+                                                    intParser())
+
+    def value(self, slot):
+        return Integer(self.slotValues[slot])
+
+
+class MyFieldComparatorSource(PythonFieldComparatorSource):
+
+    def newComparator(self, fieldname, numHits, sortPos, reversed):
+        return MyFieldComparator(numHits)
+
+
+class SampleComparable(PythonComparable):
+
+    def __init__(self, s):
+        super(SampleComparable, self).__init__()
+
+        self.string_part, self.int_part = s.split('-', 1)
+        self.int_part = int(self.int_part)
+
+    def compareTo(self, o):
+
+        # The not-so-obvious code below first checks that o needs to be
+        # downcast into a SampleComparable, does it if needed, and sets it
+        # to the inner python instance to work, getting it through '.self'.
+        # If the compareTo() call came from Java then o may just be
+        # wrapped with java.lang.Object, hence not an instance of
+        # SampleComparable even though the actual Java object is.
+        # In that case isinstance() returns False but cast_() succeeds.
+        if not isinstance(o, SampleComparable):
+            o = SampleComparable.cast_(o).self
+
+        i = cmp(self.string_part, o.string_part)
+        if i == 0:
+            return cmp(self.int_part, o.int_part)
+
+        return i
+
+    def getComparatorSource(cls):
+
+        class sortComparatorSource(PythonSortComparatorSource):
+
+            def newComparator(_self, reader, fieldName):
+                enumerator = reader.terms(Term(fieldName, ""))
+                try:
+                    class scoreDocComparator(PythonScoreDocComparator):
+                        def __init__(self_):
+                            super(scoreDocComparator, self_).__init__()
+                            self_.cachedValues = _self.fillCache(reader, enumerator, fieldName)
+
+                        def compare(self_, i, j):
+                            return self_.cachedValues[i.doc].compareTo(self_.cachedValues[j.doc])
+
+                        def sortValue(self_, i):
+                            return self_.cachedValues[i.doc]
+
+                        def sortType(self_):
+                            return SortField.CUSTOM
+
+                    return scoreDocComparator()
+                finally:
+                    enumerator.close()
+
+            def fillCache(_self, reader, enumerator, fieldName):
+                """
+                Returns an array of objects which represent that natural order
+                of the term values in the given field.
+
+                param reader     Terms are in this index.
+                param enumerator Use this to get the term values and TermDocs.
+                param fieldName  Comparables should be for this field.
+                return Array of objects representing natural order of terms
+                       in field. 
+                throws IOException If an error occurs reading the index.
+                """
+                retArray = [None] * reader.maxDoc()
+                if len(retArray) > 0:
+                    termDocs = reader.termDocs()
+                    try:
+                        if enumerator.term() is None:
+                            raise AssertionError, "no terms in field " + fieldName
+                        while True:
+                            term = enumerator.term()
+                            if term.field() != fieldName:
+                                break
+                            termval = _self.getComparable(term.text())
+                            termDocs.seek(enumerator)
+                            while termDocs.next():
+                                retArray[termDocs.doc()] = termval
+                            if not enumerator.next():
+                                break
+                    finally:
+                        termDocs.close()
+                return retArray
+
+            def getComparable(_self, termtext):
+                return SampleComparable(termtext)
+
+        return sortComparatorSource()
+
+    def getComparator(cls):
+
+        class sortComparator(PythonSortComparator):
+            def getComparable(_self, termtext):
+                return SampleComparable(termtext)
+
+            def hashCode(_self):
+                return _self.getClass().getName().hashCode()
+
+            def equals(_self, that):
+                return _self.getClass().equals(that.getClass())
+  
+        return sortComparator()
+
+    getComparatorSource = classmethod(getComparatorSource)
+    getComparator = classmethod(getComparator)
 
 
 if __name__ == "__main__":