You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2010/02/18 02:46:52 UTC

svn commit: r911245 - in /lucene/solr/trunk: CHANGES.txt src/java/org/apache/solr/search/MissingStringLastComparatorSource.java src/test/org/apache/solr/search/TestSort.java

Author: yonik
Date: Thu Feb 18 01:46:51 2010
New Revision: 911245

URL: http://svn.apache.org/viewvc?rev=911245&view=rev
Log:
SOLR-1777: fix sortMissingLast, sortMissingFirst

Added:
    lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java   (with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=911245&r1=911244&r2=911245&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Feb 18 01:46:51 2010
@@ -185,6 +185,10 @@
 * SOLR-1579: Fixes to XML escaping in stats.jsp
   (David Bowen and hossman)
 
+* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
+  result in incorrectly sorted results.  (yonik)
+
+
 Other Changes
 ----------------------
 

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java?rev=911245&r1=911244&r2=911245&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java Thu Feb 18 01:46:51 2010
@@ -43,18 +43,17 @@
   }
 
   public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
-    return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, true, missingValueProxy);
+    return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, missingValueProxy);
   }
 
 }
 
+
 // Copied from Lucene and modified since the Lucene version couldn't
 // be extended or have it's values accessed.
-
-// NOTE: there were a number of other interesting String
-// comparators explored, but this one seemed to perform
-// best all around.  See LUCENE-1483 for details.
-class MissingLastOrdComparator extends FieldComparator {
+ class MissingLastOrdComparator extends FieldComparator {
+    private static final int NULL_ORD = Integer.MAX_VALUE;
+    private final String nullVal; 
 
     private final int[] ords;
     private final String[] values;
@@ -71,30 +70,19 @@
     private final boolean reversed;
     private final int sortPos;
 
-    private final int nullCmp;
-    private final Comparable nullVal;
-
-    public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, boolean sortMissingLast, Comparable nullVal) {
+   public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, String nullVal) {
       ords = new int[numHits];
       values = new String[numHits];
       readerGen = new int[numHits];
       this.sortPos = sortPos;
       this.reversed = reversed;
       this.field = field;
-      this.nullCmp = sortMissingLast ? 1 : -1;
       this.nullVal = nullVal;
     }
 
-  public int compare(int slot1, int slot2) {
-      int ord1 = ords[slot1];
-      int ord2 = ords[slot2];
-      int cmp = ord1-ord2;
-      if (ord1==0 || ord2==0) {
-        if (cmp==0) return 0;
-        return ord1==0 ? nullCmp : -nullCmp;
-      }
-
+    public int compare(int slot1, int slot2) {
       if (readerGen[slot1] == readerGen[slot2]) {
+        int cmp = ords[slot1] - ords[slot2];
         if (cmp != 0) {
           return cmp;
         }
@@ -102,13 +90,14 @@
 
       final String val1 = values[slot1];
       final String val2 = values[slot2];
+
       if (val1 == null) {
         if (val2 == null) {
           return 0;
         }
-        return nullCmp;
+        return 1;
       } else if (val2 == null) {
-        return -nullCmp;
+        return -1;
       }
       return val1.compareTo(val2);
     }
@@ -116,27 +105,17 @@
     public int compareBottom(int doc) {
       assert bottomSlot != -1;
       int order = this.order[doc];
-      final int cmp = bottomOrd - order;
-      if (bottomOrd==0 || order==0) {
-        if (cmp==0) return 0;
-        return bottomOrd==0 ? nullCmp : -nullCmp;        
-      }
-
+      int ord = (order == 0) ? NULL_ORD : order;
+      final int cmp = bottomOrd - ord;
       if (cmp != 0) {
         return cmp;
       }
 
       final String val2 = lookup[order];
-      if (bottomValue == null) {
-        if (val2 == null) {
-          return 0;
-        }
-        // bottom wins
-        return nullCmp;
-      } else if (val2 == null) {
-        // doc wins
-        return -nullCmp;
-      }
+
+      // take care of the case where both vals are null
+      if (bottomValue == val2) return 0;
+ 
       return bottomValue.compareTo(val2);
     }
 
@@ -145,7 +124,8 @@
       int index = 0;
       String value = values[slot];
       if (value == null) {
-        ords[slot] = 0;
+        // should already be done
+        // ords[slot] = NULL_ORD;
         return;
       }
 
@@ -171,7 +151,7 @@
 
     public void copy(int slot, int doc) {
       final int ord = order[doc];
-      ords[slot] = ord;
+      ords[slot] = ord == 0 ? NULL_ORD : ord;
       assert ord >= 0;
       values[slot] = lookup[ord];
       readerGen[slot] = currentReaderGen;
@@ -196,14 +176,10 @@
       }
       bottomOrd = ords[bottom];
       assert bottomOrd >= 0;
-      assert bottomOrd < lookup.length;
+      // assert bottomOrd < lookup.length;
       bottomValue = values[bottom];
     }
 
-    public int sortType() {
-      return SortField.STRING;
-    }
-
     public Comparable value(int slot) {
       Comparable v = values[slot];
       return v==null ? nullVal : v;
@@ -220,4 +196,4 @@
     public String getField() {
       return field;
     }
-  }
\ No newline at end of file
+  }

Added: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java?rev=911245&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java Thu Feb 18 01:46:51 2010
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.util.AbstractSolrTestCase;
+
+import java.io.IOException;
+import java.util.*;
+
+public class TestSort extends AbstractSolrTestCase {
+  public String getSchemaFile() { return null; }
+  public String getSolrConfigFile() { return null; }
+
+  Random r = new Random();
+
+  int ndocs = 77;
+  int iter = 100;  
+  int qiter = 1000;
+  int commitCount = ndocs/5 + 1;
+  int maxval = ndocs*2;
+
+  static class MyDoc {
+    int doc;
+    String val;
+  }
+
+  public void testSort() throws Exception {
+    RAMDirectory dir = new RAMDirectory();
+    Document smallDoc = new Document();
+    // Field id = new Field("id","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
+    Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
+    smallDoc.add(f);
+
+    Document emptyDoc = new Document();
+
+    for (int iterCnt = 0; iterCnt<iter; iterCnt++) {
+      IndexWriter iw = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+      final MyDoc[] mydocs = new MyDoc[ndocs];
+
+      int commitCountdown = commitCount;
+      for (int i=0; i< ndocs; i++) {
+        Document doc;
+        MyDoc mydoc = new MyDoc();
+        mydoc.doc = i;
+        mydocs[i] = mydoc;
+
+        if (r.nextInt(3)==0) {
+          doc = emptyDoc;
+          mydoc.val = null;
+        } else {
+          mydoc.val = Integer.toString(r.nextInt(maxval));
+          f.setValue(mydoc.val);
+          doc = smallDoc;
+        }
+        iw.addDocument(doc);
+        if (--commitCountdown <= 0) {
+          commitCountdown = commitCount;
+          iw.commit();
+        }
+      }
+      iw.close();
+
+      /***
+      Arrays.sort(mydocs, new Comparator<MyDoc>() {
+        public int compare(MyDoc o1, MyDoc o2) {
+          String v1 = o1.val==null ? "zzz" : o1.val;
+          String v2 = o2.val==null ? "zzz" : o2.val;
+          int cmp = v1.compareTo(v2);
+          cmp = cmp==0 ? o1.doc-o2.doc : cmp;
+          return cmp;
+        }
+      });
+      ***/
+
+      IndexSearcher searcher = new IndexSearcher(dir, true);
+      // System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
+      assertTrue(searcher.getIndexReader().getSequentialSubReaders().length > 1);
+
+      for (int i=0; i<qiter; i++) {
+        Filter filt = new Filter() {
+          @Override
+          public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+            return randSet(reader.maxDoc());
+          }
+        };
+
+        int top = r.nextInt((ndocs>>3)+1)+1;
+        final boolean sortMissingLast = r.nextBoolean();
+        final boolean reverse = !sortMissingLast;
+        List<SortField> sfields = new ArrayList<SortField>();
+
+        if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
+        // hit both use-cases of sort-missing-last
+        sfields.add( Sorting.getStringSortField("f", reverse, sortMissingLast, !sortMissingLast) );
+        int sortIdx = sfields.size() - 1;
+        if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
+
+        Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
+
+        // final String nullRep = sortMissingLast ? "zzz" : "";
+        final String nullRep = "zzz";
+
+        boolean trackScores = r.nextBoolean();
+        boolean trackMaxScores = r.nextBoolean();
+        boolean scoreInOrder = r.nextBoolean();
+        final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores, scoreInOrder);
+
+        final List<MyDoc> collectedDocs = new ArrayList<MyDoc>();
+        // delegate and collect docs ourselves
+        Collector myCollector = new Collector() {
+          int docBase;
+
+          @Override
+          public void setScorer(Scorer scorer) throws IOException {
+            topCollector.setScorer(scorer);
+          }
+
+          @Override
+          public void collect(int doc) throws IOException {
+            topCollector.collect(doc);
+            collectedDocs.add(mydocs[doc + docBase]);
+          }
+
+          @Override
+          public void setNextReader(IndexReader reader, int docBase) throws IOException {
+            topCollector.setNextReader(reader,docBase);
+            this.docBase = docBase;
+          }
+
+          @Override
+          public boolean acceptsDocsOutOfOrder() {
+            return topCollector.acceptsDocsOutOfOrder();
+          }
+        };
+
+        searcher.search(new MatchAllDocsQuery(), filt, myCollector);
+
+        Collections.sort(collectedDocs, new Comparator<MyDoc>() {
+          public int compare(MyDoc o1, MyDoc o2) {
+            String v1 = o1.val==null ? nullRep : o1.val;
+            String v2 = o2.val==null ? nullRep : o2.val;
+            int cmp = v1.compareTo(v2);
+            if (reverse) cmp = -cmp;
+            cmp = cmp==0 ? o1.doc-o2.doc : cmp;
+            return cmp;
+          }
+        });
+
+
+        TopDocs topDocs = topCollector.topDocs();
+        ScoreDoc[] sdocs = topDocs.scoreDocs;
+        for (int j=0; j<sdocs.length; j++) {
+          int id = sdocs[j].doc;
+          String s = (String)((FieldDoc)sdocs[j]).fields[sortIdx];
+          if (id != collectedDocs.get(j).doc) {
+            System.out.println("Error at pos " + j);
+          }
+          assertEquals(id, collectedDocs.get(j).doc);
+        }
+      }
+    }
+
+  }
+
+  public DocIdSet randSet(int sz) {
+    OpenBitSet obs = new OpenBitSet(sz);
+    int n = r.nextInt(sz);
+    for (int i=0; i<n; i++) {
+      obs.fastSet(r.nextInt(sz));
+    }
+    return obs;
+  }  
+  
+
+}

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL