You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2010/02/18 02:46:52 UTC
svn commit: r911245 - in /lucene/solr/trunk: CHANGES.txt
src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
src/test/org/apache/solr/search/TestSort.java
Author: yonik
Date: Thu Feb 18 01:46:51 2010
New Revision: 911245
URL: http://svn.apache.org/viewvc?rev=911245&view=rev
Log:
SOLR-1777: fix sortMissingLast, sortMissingFirst
Added:
lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=911245&r1=911244&r2=911245&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Feb 18 01:46:51 2010
@@ -185,6 +185,10 @@
* SOLR-1579: Fixes to XML escaping in stats.jsp
(David Bowen and hossman)
+* SOLR-1777: fieldTypes with sortMissingLast=true or sortMissingFirst=true can
+ result in incorrectly sorted results. (yonik)
+
+
Other Changes
----------------------
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java?rev=911245&r1=911244&r2=911245&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java Thu Feb 18 01:46:51 2010
@@ -43,18 +43,17 @@
}
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
- return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, true, missingValueProxy);
+ return new MissingLastOrdComparator(numHits, fieldname, sortPos, reversed, missingValueProxy);
}
}
+
// Copied from Lucene and modified since the Lucene version couldn't
// be extended or have it's values accessed.
-
-// NOTE: there were a number of other interesting String
-// comparators explored, but this one seemed to perform
-// best all around. See LUCENE-1483 for details.
-class MissingLastOrdComparator extends FieldComparator {
+ class MissingLastOrdComparator extends FieldComparator {
+ private static final int NULL_ORD = Integer.MAX_VALUE;
+ private final String nullVal;
private final int[] ords;
private final String[] values;
@@ -71,30 +70,19 @@
private final boolean reversed;
private final int sortPos;
- private final int nullCmp;
- private final Comparable nullVal;
-
- public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, boolean sortMissingLast, Comparable nullVal) {
+ public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, String nullVal) {
ords = new int[numHits];
values = new String[numHits];
readerGen = new int[numHits];
this.sortPos = sortPos;
this.reversed = reversed;
this.field = field;
- this.nullCmp = sortMissingLast ? 1 : -1;
this.nullVal = nullVal;
}
- public int compare(int slot1, int slot2) {
- int ord1 = ords[slot1];
- int ord2 = ords[slot2];
- int cmp = ord1-ord2;
- if (ord1==0 || ord2==0) {
- if (cmp==0) return 0;
- return ord1==0 ? nullCmp : -nullCmp;
- }
-
+ public int compare(int slot1, int slot2) {
if (readerGen[slot1] == readerGen[slot2]) {
+ int cmp = ords[slot1] - ords[slot2];
if (cmp != 0) {
return cmp;
}
@@ -102,13 +90,14 @@
final String val1 = values[slot1];
final String val2 = values[slot2];
+
if (val1 == null) {
if (val2 == null) {
return 0;
}
- return nullCmp;
+ return 1;
} else if (val2 == null) {
- return -nullCmp;
+ return -1;
}
return val1.compareTo(val2);
}
@@ -116,27 +105,17 @@
public int compareBottom(int doc) {
assert bottomSlot != -1;
int order = this.order[doc];
- final int cmp = bottomOrd - order;
- if (bottomOrd==0 || order==0) {
- if (cmp==0) return 0;
- return bottomOrd==0 ? nullCmp : -nullCmp;
- }
-
+ int ord = (order == 0) ? NULL_ORD : order;
+ final int cmp = bottomOrd - ord;
if (cmp != 0) {
return cmp;
}
final String val2 = lookup[order];
- if (bottomValue == null) {
- if (val2 == null) {
- return 0;
- }
- // bottom wins
- return nullCmp;
- } else if (val2 == null) {
- // doc wins
- return -nullCmp;
- }
+
+ // take care of the case where both vals are null
+ if (bottomValue == val2) return 0;
+
return bottomValue.compareTo(val2);
}
@@ -145,7 +124,8 @@
int index = 0;
String value = values[slot];
if (value == null) {
- ords[slot] = 0;
+ // should already be done
+ // ords[slot] = NULL_ORD;
return;
}
@@ -171,7 +151,7 @@
public void copy(int slot, int doc) {
final int ord = order[doc];
- ords[slot] = ord;
+ ords[slot] = ord == 0 ? NULL_ORD : ord;
assert ord >= 0;
values[slot] = lookup[ord];
readerGen[slot] = currentReaderGen;
@@ -196,14 +176,10 @@
}
bottomOrd = ords[bottom];
assert bottomOrd >= 0;
- assert bottomOrd < lookup.length;
+ // assert bottomOrd < lookup.length;
bottomValue = values[bottom];
}
- public int sortType() {
- return SortField.STRING;
- }
-
public Comparable value(int slot) {
Comparable v = values[slot];
return v==null ? nullVal : v;
@@ -220,4 +196,4 @@
public String getField() {
return field;
}
- }
\ No newline at end of file
+ }
Added: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java?rev=911245&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java Thu Feb 18 01:46:51 2010
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.util.AbstractSolrTestCase;
+
+import java.io.IOException;
+import java.util.*;
+
+public class TestSort extends AbstractSolrTestCase {
+ public String getSchemaFile() { return null; }
+ public String getSolrConfigFile() { return null; }
+
+ Random r = new Random();
+
+ int ndocs = 77;
+ int iter = 100;
+ int qiter = 1000;
+ int commitCount = ndocs/5 + 1;
+ int maxval = ndocs*2;
+
+ static class MyDoc {
+ int doc;
+ String val;
+ }
+
+ public void testSort() throws Exception {
+ RAMDirectory dir = new RAMDirectory();
+ Document smallDoc = new Document();
+ // Field id = new Field("id","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
+ Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
+ smallDoc.add(f);
+
+ Document emptyDoc = new Document();
+
+ for (int iterCnt = 0; iterCnt<iter; iterCnt++) {
+ IndexWriter iw = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+ final MyDoc[] mydocs = new MyDoc[ndocs];
+
+ int commitCountdown = commitCount;
+ for (int i=0; i< ndocs; i++) {
+ Document doc;
+ MyDoc mydoc = new MyDoc();
+ mydoc.doc = i;
+ mydocs[i] = mydoc;
+
+ if (r.nextInt(3)==0) {
+ doc = emptyDoc;
+ mydoc.val = null;
+ } else {
+ mydoc.val = Integer.toString(r.nextInt(maxval));
+ f.setValue(mydoc.val);
+ doc = smallDoc;
+ }
+ iw.addDocument(doc);
+ if (--commitCountdown <= 0) {
+ commitCountdown = commitCount;
+ iw.commit();
+ }
+ }
+ iw.close();
+
+ /***
+ Arrays.sort(mydocs, new Comparator<MyDoc>() {
+ public int compare(MyDoc o1, MyDoc o2) {
+ String v1 = o1.val==null ? "zzz" : o1.val;
+ String v2 = o2.val==null ? "zzz" : o2.val;
+ int cmp = v1.compareTo(v2);
+ cmp = cmp==0 ? o1.doc-o2.doc : cmp;
+ return cmp;
+ }
+ });
+ ***/
+
+ IndexSearcher searcher = new IndexSearcher(dir, true);
+ // System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
+ assertTrue(searcher.getIndexReader().getSequentialSubReaders().length > 1);
+
+ for (int i=0; i<qiter; i++) {
+ Filter filt = new Filter() {
+ @Override
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ return randSet(reader.maxDoc());
+ }
+ };
+
+ int top = r.nextInt((ndocs>>3)+1)+1;
+ final boolean sortMissingLast = r.nextBoolean();
+ final boolean reverse = !sortMissingLast;
+ List<SortField> sfields = new ArrayList<SortField>();
+
+ if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
+ // hit both use-cases of sort-missing-last
+ sfields.add( Sorting.getStringSortField("f", reverse, sortMissingLast, !sortMissingLast) );
+ int sortIdx = sfields.size() - 1;
+ if (r.nextBoolean()) sfields.add( new SortField(null, SortField.SCORE));
+
+ Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
+
+ // final String nullRep = sortMissingLast ? "zzz" : "";
+ final String nullRep = "zzz";
+
+ boolean trackScores = r.nextBoolean();
+ boolean trackMaxScores = r.nextBoolean();
+ boolean scoreInOrder = r.nextBoolean();
+ final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores, scoreInOrder);
+
+ final List<MyDoc> collectedDocs = new ArrayList<MyDoc>();
+ // delegate and collect docs ourselves
+ Collector myCollector = new Collector() {
+ int docBase;
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ topCollector.setScorer(scorer);
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ topCollector.collect(doc);
+ collectedDocs.add(mydocs[doc + docBase]);
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ topCollector.setNextReader(reader,docBase);
+ this.docBase = docBase;
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return topCollector.acceptsDocsOutOfOrder();
+ }
+ };
+
+ searcher.search(new MatchAllDocsQuery(), filt, myCollector);
+
+ Collections.sort(collectedDocs, new Comparator<MyDoc>() {
+ public int compare(MyDoc o1, MyDoc o2) {
+ String v1 = o1.val==null ? nullRep : o1.val;
+ String v2 = o2.val==null ? nullRep : o2.val;
+ int cmp = v1.compareTo(v2);
+ if (reverse) cmp = -cmp;
+ cmp = cmp==0 ? o1.doc-o2.doc : cmp;
+ return cmp;
+ }
+ });
+
+
+ TopDocs topDocs = topCollector.topDocs();
+ ScoreDoc[] sdocs = topDocs.scoreDocs;
+ for (int j=0; j<sdocs.length; j++) {
+ int id = sdocs[j].doc;
+ String s = (String)((FieldDoc)sdocs[j]).fields[sortIdx];
+ if (id != collectedDocs.get(j).doc) {
+ System.out.println("Error at pos " + j);
+ }
+ assertEquals(id, collectedDocs.get(j).doc);
+ }
+ }
+ }
+
+ }
+
+ public DocIdSet randSet(int sz) {
+ OpenBitSet obs = new OpenBitSet(sz);
+ int n = r.nextInt(sz);
+ for (int i=0; i<n; i++) {
+ obs.fastSet(r.nextInt(sz));
+ }
+ return obs;
+ }
+
+
+}
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestSort.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL