You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/07/13 12:00:05 UTC
svn commit: r963654 - in /lucene/dev/trunk/lucene: CHANGES.txt
src/java/org/apache/lucene/search/FieldComparator.java
Author: mikemccand
Date: Tue Jul 13 10:00:05 2010
New Revision: 963654
URL: http://svn.apache.org/viewvc?rev=963654&view=rev
Log:
LUCENE-2531: fix string sort to only compare-by-value when necessary
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldComparator.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=963654&r1=963653&r2=963654&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jul 13 10:00:05 2010
@@ -197,6 +197,10 @@ Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
(Mike McCandless)
+
+* LUCENE-2531: Fix issue when sorting by a String field that was
+ causing too many fallbacks to compare-by-value (instead of by-ord).
+ (Mike McCandless)
======================= Lucene 3.x (not yet released) =======================
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldComparator.java?rev=963654&r1=963653&r2=963654&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldComparator.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldComparator.java Tue Jul 13 10:00:05 2010
@@ -31,6 +31,7 @@ import org.apache.lucene.search.FieldCac
import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
/**
* Expert: a FieldComparator compares hits so as to determine their
@@ -709,23 +710,21 @@ public abstract class FieldComparator {
private final BytesRef[] values;
private final int[] readerGen;
+ private PackedInts.Reader currentDocToOrd;
private int currentReaderGen = -1;
private DocTermsIndex termsIndex;
private final String field;
private int bottomSlot = -1;
private int bottomOrd;
+ private boolean bottomSameReader;
private BytesRef bottomValue;
- private final boolean reversed;
- private final int sortPos;
private final BytesRef tempBR = new BytesRef();
public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
ords = new int[numHits];
values = new BytesRef[numHits];
readerGen = new int[numHits];
- this.sortPos = sortPos;
- this.reversed = reversed;
this.field = field;
}
@@ -754,59 +753,38 @@ public abstract class FieldComparator {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
- int order = termsIndex.getOrd(doc);
- final int cmp = bottomOrd - order;
- if (cmp != 0) {
- return cmp;
- }
-
- if (bottomValue == null) {
- if (order == 0) {
- // unset
- return 0;
+ if (bottomSameReader) {
+ // ord is precisely comparable, even in the equal case
+ return bottomOrd - (int) currentDocToOrd.get(doc);
+ } else {
+ // ord is only approx comparable: if they are not
+ // equal, we can use that; if they are equal, we
+ // must fallback to compare by value
+ final int order = (int) currentDocToOrd.get(doc);
+ final int cmp = bottomOrd - order;
+ if (cmp != 0) {
+ return cmp;
}
- // bottom wins
- return -1;
- } else if (order == 0) {
- // doc wins
- return 1;
- }
- termsIndex.lookup(order, tempBR);
- return bottomValue.compareTo(tempBR);
- }
- private void convert(int slot) {
- readerGen[slot] = currentReaderGen;
- int index = 0;
- BytesRef value = values[slot];
- if (value == null) {
- // 0 ord is null for all segments
- assert ords[slot] == 0;
- return;
- }
-
- if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) {
- // Since we are the primary sort, the entries in the
- // queue are bounded by bottomOrd:
- if (reversed) {
- index = binarySearch(tempBR, termsIndex, value, bottomOrd, termsIndex.numOrd()-1);
- } else {
- index = binarySearch(tempBR, termsIndex, value, 0, bottomOrd);
+ if (bottomValue == null) {
+ if (order == 0) {
+ // unset
+ return 0;
+ }
+ // bottom wins
+ return -1;
+ } else if (order == 0) {
+ // doc wins
+ return 1;
}
- } else {
- // Full binary search
- index = binarySearch(tempBR, termsIndex, value);
- }
-
- if (index < 0) {
- index = -index - 2;
+ termsIndex.lookup(order, tempBR);
+ return bottomValue.compareTo(tempBR);
}
- ords[slot] = index;
}
@Override
public void copy(int slot, int doc) {
- final int ord = termsIndex.getOrd(doc);
+ final int ord = (int) currentDocToOrd.get(doc);
if (ord == 0) {
values[slot] = null;
} else {
@@ -823,21 +801,34 @@ public abstract class FieldComparator {
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field);
+ currentDocToOrd = termsIndex.getDocToOrd();
currentReaderGen++;
if (bottomSlot != -1) {
- convert(bottomSlot);
- bottomOrd = ords[bottomSlot];
+ setBottom(bottomSlot);
}
}
@Override
public void setBottom(final int bottom) {
bottomSlot = bottom;
- if (readerGen[bottom] != currentReaderGen) {
- convert(bottomSlot);
+
+ bottomValue = values[bottomSlot];
+ if (bottomValue == null) {
+ // 0 ord is null for all segments
+ assert ords[bottomSlot] == 0;
+ bottomOrd = 0;
+ bottomSameReader = true;
+ } else {
+ final int index = binarySearch(tempBR, termsIndex, bottomValue);
+ if (index < 0) {
+ bottomOrd = -index - 2;
+ bottomSameReader = false;
+ } else {
+ bottomOrd = index;
+ // exact value match
+ bottomSameReader = true;
+ }
}
- bottomOrd = ords[bottom];
- bottomValue = values[bottom];
}
@Override