You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/07/22 13:28:17 UTC
svn commit: r1692255 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/CHANGES.txt lucene/core/
lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Author: jpountz
Date: Wed Jul 22 11:28:16 2015
New Revision: 1692255
URL: http://svn.apache.org/r1692255
Log:
LUCENE-6690: Speed up MultiTermsEnum.next().
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1692255&r1=1692254&r2=1692255&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed Jul 22 11:28:16 2015
@@ -317,6 +317,9 @@ Optimizations
in the case that there are few unique sets of values.
(Adrien Grand, Robert Muir)
+* LUCENE-6690: Sped up MultiTermsEnum.next() on high-cardinality fields.
+ (Adrien Grand)
+
Build
* LUCENE-6518: Don't report false thread leaks from IBM J9
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1692255&r1=1692254&r2=1692255&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java Wed Jul 22 11:28:16 2015
@@ -19,7 +19,9 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Comparator;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.PriorityQueue;
@@ -31,7 +33,14 @@ import org.apache.lucene.util.PriorityQu
* @lucene.experimental
*/
public final class MultiTermsEnum extends TermsEnum {
-
+
+ private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR = new Comparator<TermsEnumWithSlice>() {
+ @Override
+ public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
+ return o1.index - o2.index;
+ }
+ };
+
private final TermMergeQueue queue;
private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader)
private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field
@@ -213,12 +222,14 @@ public final class MultiTermsEnum extend
if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
+ queue.add(currentSubs[i]);
} else {
if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term();
assert currentSubs[i].current != null;
queue.add(currentSubs[i]);
} else {
+ assert status == SeekStatus.END;
// enum exhausted
currentSubs[i].current = null;
}
@@ -253,23 +264,19 @@ public final class MultiTermsEnum extend
// extract all subs from the queue that have the same
// top term
assert numTop == 0;
- while(true) {
- top[numTop++] = queue.pop();
- if (queue.size() == 0 || !(queue.top()).current.bytesEquals(top[0].current)) {
- break;
- }
- }
+ numTop = queue.fillTop(top);
current = top[0].current;
}
private void pushTop() throws IOException {
- // call next() on each top, and put back into queue
- for(int i=0;i<numTop;i++) {
- top[i].current = top[i].terms.next();
- if (top[i].current != null) {
- queue.add(top[i]);
+ // call next() on each top, and reorder queue
+ for (int i = 0; i < numTop; i++) {
+ TermsEnumWithSlice top = queue.top();
+ top.current = top.terms.next();
+ if (top.current == null) {
+ queue.pop();
} else {
- // no more fields in this reader
+ queue.updateTop();
}
}
numTop = 0;
@@ -342,6 +349,8 @@ public final class MultiTermsEnum extend
int upto = 0;
+ ArrayUtil.timSort(top, 0, numTop, INDEX_COMPARATOR);
+
for(int i=0;i<numTop;i++) {
final TermsEnumWithSlice entry = top[i];
@@ -382,18 +391,47 @@ public final class MultiTermsEnum extend
}
private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
+
+ final int[] stack;
+
TermMergeQueue(int size) {
super(size);
+ this.stack = new int[size];
}
@Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
- final int cmp = termsA.current.compareTo(termsB.current);
- if (cmp != 0) {
- return cmp < 0;
- } else {
- return termsA.subSlice.start < termsB.subSlice.start;
+ return termsA.current.compareTo(termsB.current) < 0;
+ }
+
+ /** Add the {@link #top()} slice as well as all slices that are positionned
+ * on the same term to {@code tops} and return how many of them there are. */
+ int fillTop(TermsEnumWithSlice[] tops) {
+ final int size = size();
+ if (size == 0) {
+ return 0;
}
+ tops[0] = top();
+ int numTop = 1;
+ stack[0] = 1;
+ int stackLen = 1;
+
+ while (stackLen != 0) {
+ final int index = stack[--stackLen];
+ final int leftChild = index << 1;
+ for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
+ TermsEnumWithSlice te = get(child);
+ if (te.current.equals(tops[0].current)) {
+ tops[numTop++] = te;
+ stack[stackLen++] = child;
+ }
+ }
+ }
+ return numTop;
+ }
+
+ private TermsEnumWithSlice get(int i) {
+ return (TermsEnumWithSlice) getHeapArray()[i];
}
}