You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/12/21 19:44:32 UTC
[3/3] lucene-solr:branch_6x: LUCENE-7600: Simplify DocIdMerger.
LUCENE-7600: Simplify DocIdMerger.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c8c395db
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c8c395db
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c8c395db
Branch: refs/heads/branch_6x
Commit: c8c395db492840da778f93949798368dcde85c92
Parents: 10f1964
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Dec 21 19:34:19 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Dec 21 20:15:31 2016 +0100
----------------------------------------------------------------------
.../apache/lucene/codecs/DocValuesConsumer.java | 14 +-
.../org/apache/lucene/codecs/NormsConsumer.java | 2 +-
.../lucene/codecs/StoredFieldsWriter.java | 2 +-
.../apache/lucene/codecs/TermVectorsWriter.java | 2 +-
.../org/apache/lucene/index/DocIDMerger.java | 213 ++++++++++---------
.../lucene/index/MappingMultiPostingsEnum.java | 2 +-
.../apache/lucene/index/TestDocIDMerger.java | 4 +-
7 files changed, 131 insertions(+), 108 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 1fe4d2e..e981757 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -293,7 +293,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
}
- final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
long nextValue;
@@ -380,7 +380,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
}
- final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<BinaryDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<BytesRef>() {
BytesRef nextValue;
@@ -480,7 +480,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
}
- final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
int nextValue;
@@ -531,7 +531,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge2.get(i), mergeState.maxDocs[i]));
}
- final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
long nextValue;
@@ -693,7 +693,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
}
- final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
int nextValue;
@@ -852,7 +852,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
}
- final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
int nextValue;
@@ -910,7 +910,7 @@ public abstract class DocValuesConsumer implements Closeable {
subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
}
- final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+ final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.segmentInfo.getIndexSort() != null);
return new Iterator<Number>() {
long nextValue;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
index 7595a71..0358ec4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
@@ -143,7 +143,7 @@ public abstract class NormsConsumer implements Closeable {
subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
}
- final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new Iterator<Number>() {
long nextValue;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
index 80a9c49..0540f4f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
@@ -117,7 +117,7 @@ public abstract class StoredFieldsWriter implements Closeable {
subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
}
- final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<StoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
index c8ad9f6..b84065a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@@ -205,7 +205,7 @@ public abstract class TermVectorsWriter implements Closeable {
subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
}
- final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+ final DocIDMerger<TermVectorsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
int docCount = 0;
while (true) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
index 0fb0e3b..6dcc596 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
@@ -28,17 +28,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
* concatenated (unsorted) order, or by a specified index-time sort, skipping
* deleted documents and remapping non-deleted documents. */
-public class DocIDMerger<T extends DocIDMerger.Sub> {
-
- private final List<T> subs;
-
- // Used when indexSort != null:
- private final PriorityQueue<T> queue;
- private boolean first;
-
- // Used when indexIsSorted
- private T current;
- private int nextIndex;
+public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
/** Represents one sub-reader being merged */
public static abstract class Sub {
@@ -57,10 +47,84 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
}
/** Construct this from the provided subs, specifying the maximum sub count */
- public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
- this.subs = subs;
-
+ public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, int maxCount, boolean indexIsSorted) {
if (indexIsSorted && maxCount > 1) {
+ return new SortedDocIDMerger<>(subs, maxCount);
+ } else {
+ return new SequentialDocIDMerger<>(subs);
+ }
+ }
+
+ /** Construct this from the provided subs */
+ public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, boolean indexIsSorted) {
+ return of(subs, subs.size(), indexIsSorted);
+ }
+
+ /** Reuse API, currently only used by postings during merge */
+ public abstract void reset();
+
+ /** Returns null when done */
+ public abstract T next();
+
+ private DocIDMerger() {}
+
+ private static class SequentialDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
+
+ private final List<T> subs;
+ private T current;
+ private int nextIndex;
+
+ private SequentialDocIDMerger(List<T> subs) {
+ this.subs = subs;
+ reset();
+ }
+
+ @Override
+ public void reset() {
+ if (subs.size() > 0) {
+ current = subs.get(0);
+ nextIndex = 1;
+ } else {
+ current = null;
+ nextIndex = 0;
+ }
+ }
+
+ @Override
+ public T next() {
+ if (current == null) {
+ // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+ return null;
+ }
+ while (true) {
+ int docID = current.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ if (nextIndex == subs.size()) {
+ current = null;
+ return null;
+ }
+ current = subs.get(nextIndex);
+ nextIndex++;
+ continue;
+ }
+
+ int mappedDocID = current.docMap.get(docID);
+ if (mappedDocID != -1) {
+ current.mappedDocID = mappedDocID;
+ return current;
+ }
+ }
+ }
+
+ }
+
+ private static class SortedDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
+
+ private final List<T> subs;
+ private final PriorityQueue<T> queue;
+
+ private SortedDocIDMerger(List<T> subs, int maxCount) {
+ this.subs = subs;
queue = new PriorityQueue<T>(maxCount) {
@Override
protected boolean lessThan(Sub a, Sub b) {
@@ -68,112 +132,71 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
return a.mappedDocID < b.mappedDocID;
}
};
- } else {
- // We simply concatentate
- queue = null;
+ reset();
}
- reset();
- }
-
- /** Construct this from the provided subs */
- public DocIDMerger(List<T> subs, boolean indexIsSorted) {
- this(subs, subs.size(), indexIsSorted);
- }
-
- /** Reuse API, currently only used by postings during merge */
- public void reset() {
- if (queue != null) {
+ @Override
+ public void reset() {
// caller may not have fully consumed the queue:
queue.clear();
+ boolean first = true;
for(T sub : subs) {
- while (true) {
- int docID = sub.nextDoc();
- if (docID == NO_MORE_DOCS) {
- // all docs in this sub were deleted; do not add it to the queue!
- break;
+ if (first) {
+ // by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue
+ // so the first call to next() will advance it
+ sub.mappedDocID = -1;
+ first = false;
+ } else {
+ int mappedDocID;
+ while (true) {
+ int docID = sub.nextDoc();
+ if (docID == NO_MORE_DOCS) {
+ mappedDocID = NO_MORE_DOCS;
+ break;
+ }
+ mappedDocID = sub.docMap.get(docID);
+ if (mappedDocID != -1) {
+ break;
+ }
}
-
- int mappedDocID = sub.docMap.get(docID);
- if (mappedDocID == -1) {
- // doc was deleted
+ if (mappedDocID == NO_MORE_DOCS) {
+ // all docs in this sub were deleted; do not add it to the queue!
continue;
- } else {
- sub.mappedDocID = mappedDocID;
- queue.add(sub);
- break;
}
+ sub.mappedDocID = mappedDocID;
}
+ queue.add(sub);
}
- first = true;
- } else if (subs.size() > 0) {
- current = subs.get(0);
- nextIndex = 1;
- } else {
- current = null;
- nextIndex = 0;
}
- }
- /** Returns null when done */
- public T next() {
- // Loop until we find a non-deleted document
- if (queue != null) {
+ @Override
+ public T next() {
T top = queue.top();
if (top == null) {
// NOTE: it's annoying that caller is allowed to call us again even after we returned null before
return null;
}
- if (first == false) {
- while (true) {
- int docID = top.nextDoc();
- if (docID == NO_MORE_DOCS) {
- queue.pop();
- top = queue.top();
- break;
- }
- int mappedDocID = top.docMap.get(docID);
- if (mappedDocID == -1) {
- // doc was deleted
- continue;
- } else {
- top.mappedDocID = mappedDocID;
- top = queue.updateTop();
- break;
- }
- }
- }
-
- first = false;
-
- return top;
-
- } else {
while (true) {
- if (current == null) {
- // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
- return null;
- }
- int docID = current.nextDoc();
+ int docID = top.nextDoc();
if (docID == NO_MORE_DOCS) {
- if (nextIndex == subs.size()) {
- current = null;
- return null;
- }
- current = subs.get(nextIndex);
- nextIndex++;
- continue;
+ queue.pop();
+ top = queue.top();
+ break;
}
- int mappedDocID = current.docMap.get(docID);
+ int mappedDocID = top.docMap.get(docID);
if (mappedDocID == -1) {
- // doc is deleted
+ // doc was deleted
continue;
+ } else {
+ top.mappedDocID = mappedDocID;
+ top = queue.updateTop();
+ break;
}
-
- current.mappedDocID = mappedDocID;
- return current;
}
+
+ return top;
}
}
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
index d93c771..6672d64 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
@@ -62,7 +62,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
for(int i=0;i<allSubs.length;i++) {
allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]);
}
- this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.needsIndexSort);
+ this.docIDMerger = DocIDMerger.of(subs, allSubs.length, mergeState.needsIndexSort);
}
MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
index d957646..5e19bbb 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
@@ -70,7 +70,7 @@ public class TestDocIDMerger extends LuceneTestCase {
valueStart += maxDoc;
}
- DocIDMerger<TestSubUnsorted> merger = new DocIDMerger<>(subs, false);
+ DocIDMerger<TestSubUnsorted> merger = DocIDMerger.of(subs, false);
int count = 0;
while (true) {
@@ -175,7 +175,7 @@ public class TestDocIDMerger extends LuceneTestCase {
}, docMap.length, i));
}
- DocIDMerger<TestSubSorted> merger = new DocIDMerger<>(subs, true);
+ DocIDMerger<TestSubSorted> merger = DocIDMerger.of(subs, true);
int count = 0;
while (true) {