You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/12/21 19:44:32 UTC

[3/3] lucene-solr:branch_6x: LUCENE-7600: Simplify DocIdMerger.

LUCENE-7600: Simplify DocIdMerger.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c8c395db
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c8c395db
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c8c395db

Branch: refs/heads/branch_6x
Commit: c8c395db492840da778f93949798368dcde85c92
Parents: 10f1964
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Dec 21 19:34:19 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Dec 21 20:15:31 2016 +0100

----------------------------------------------------------------------
 .../apache/lucene/codecs/DocValuesConsumer.java |  14 +-
 .../org/apache/lucene/codecs/NormsConsumer.java |   2 +-
 .../lucene/codecs/StoredFieldsWriter.java       |   2 +-
 .../apache/lucene/codecs/TermVectorsWriter.java |   2 +-
 .../org/apache/lucene/index/DocIDMerger.java    | 213 ++++++++++---------
 .../lucene/index/MappingMultiPostingsEnum.java  |   2 +-
 .../apache/lucene/index/TestDocIDMerger.java    |   4 +-
 7 files changed, 131 insertions(+), 108 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 1fe4d2e..e981757 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -293,7 +293,7 @@ public abstract class DocValuesConsumer implements Closeable {
                           subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
                         }
 
-                        final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+                        final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
                         return new Iterator<Number>() {
                           long nextValue;
@@ -380,7 +380,7 @@ public abstract class DocValuesConsumer implements Closeable {
                          subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], toMerge.get(i), docsWithField.get(i), mergeState.maxDocs[i]));
                        }
 
-                       final DocIDMerger<BinaryDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+                       final DocIDMerger<BinaryDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
                        return new Iterator<BytesRef>() {
                          BytesRef nextValue;
@@ -480,7 +480,7 @@ public abstract class DocValuesConsumer implements Closeable {
               subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
             }
 
-            final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+            final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
             return new Iterator<Number>() {
               int nextValue;
@@ -531,7 +531,7 @@ public abstract class DocValuesConsumer implements Closeable {
               subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge2.get(i), mergeState.maxDocs[i]));
             }
 
-            final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+            final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
             return new Iterator<Number>() {
               long nextValue;
@@ -693,7 +693,7 @@ public abstract class DocValuesConsumer implements Closeable {
               subs.add(new SortedDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
             }
 
-            final DocIDMerger<SortedDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+            final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
             return new Iterator<Number>() {
               int nextValue;
@@ -852,7 +852,7 @@ public abstract class DocValuesConsumer implements Closeable {
               subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
             }
 
-            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
             return new Iterator<Number>() {
               int nextValue;
@@ -910,7 +910,7 @@ public abstract class DocValuesConsumer implements Closeable {
               subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i], map.getGlobalOrds(i)));
             }
 
-            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
+            final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.segmentInfo.getIndexSort() != null);
 
             return new Iterator<Number>() {
               long nextValue;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
index 7595a71..0358ec4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java
@@ -143,7 +143,7 @@ public abstract class NormsConsumer implements Closeable {
                           subs.add(new NumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
                         }
 
-                        final DocIDMerger<NumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+                        final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
                         return new Iterator<Number>() {
                           long nextValue;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
index 80a9c49..0540f4f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
@@ -117,7 +117,7 @@ public abstract class StoredFieldsWriter implements Closeable {
       subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i]));
     }
 
-    final DocIDMerger<StoredFieldsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+    final DocIDMerger<StoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
     int docCount = 0;
     while (true) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
index c8ad9f6..b84065a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@@ -205,7 +205,7 @@ public abstract class TermVectorsWriter implements Closeable {
       subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
     }
 
-    final DocIDMerger<TermVectorsMergeSub> docIDMerger = new DocIDMerger<>(subs, mergeState.needsIndexSort);
+    final DocIDMerger<TermVectorsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
 
     int docCount = 0;
     while (true) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
index 0fb0e3b..6dcc596 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
@@ -28,17 +28,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
  *  concatenated (unsorted) order, or by a specified index-time sort, skipping
  *  deleted documents and remapping non-deleted documents. */
 
-public class DocIDMerger<T extends DocIDMerger.Sub> {
-
-  private final List<T> subs;
-
-  // Used when indexSort != null:
-  private final PriorityQueue<T> queue;
-  private boolean first;
-
-  // Used when indexIsSorted
-  private T current;
-  private int nextIndex;
+public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
 
   /** Represents one sub-reader being merged */
   public static abstract class Sub {
@@ -57,10 +47,84 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
   }
 
   /** Construct this from the provided subs, specifying the maximum sub count */
-  public DocIDMerger(List<T> subs, int maxCount, boolean indexIsSorted) {
-    this.subs = subs;
-
+  public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, int maxCount, boolean indexIsSorted) {
     if (indexIsSorted && maxCount > 1) {
+      return new SortedDocIDMerger<>(subs, maxCount);
+    } else {
+      return new SequentialDocIDMerger<>(subs);
+    }
+  }
+
+  /** Construct this from the provided subs */
+  public static <T extends DocIDMerger.Sub> DocIDMerger<T> of(List<T> subs, boolean indexIsSorted) {
+    return of(subs, subs.size(), indexIsSorted);
+  }
+
+  /** Reuse API, currently only used by postings during merge */
+  public abstract void reset();
+
+  /** Returns null when done */
+  public abstract T next();
+
+  private DocIDMerger() {}
+
+  private static class SequentialDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
+
+    private final List<T> subs;
+    private T current;
+    private int nextIndex;
+
+    private SequentialDocIDMerger(List<T> subs) {
+      this.subs = subs;
+      reset();
+    }
+
+    @Override
+    public void reset() {
+      if (subs.size() > 0) {
+        current = subs.get(0);
+        nextIndex = 1;
+      } else {
+        current = null;
+        nextIndex = 0;
+      }
+    }
+
+    @Override
+    public T next() {
+      if (current == null) {
+        // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
+        return null;
+      }
+      while (true) {
+        int docID = current.nextDoc();
+        if (docID == NO_MORE_DOCS) {
+          if (nextIndex == subs.size()) {
+            current = null;
+            return null;
+          }
+          current = subs.get(nextIndex);
+          nextIndex++;
+          continue;
+        }
+
+        int mappedDocID = current.docMap.get(docID);
+        if (mappedDocID != -1) {
+          current.mappedDocID = mappedDocID;
+          return current;
+        }
+      }
+    }
+
+  }
+
+  private static class SortedDocIDMerger<T extends DocIDMerger.Sub> extends DocIDMerger<T> {
+
+    private final List<T> subs;
+    private final PriorityQueue<T> queue;
+
+    private SortedDocIDMerger(List<T> subs, int maxCount) {
+      this.subs = subs;
       queue = new PriorityQueue<T>(maxCount) {
         @Override
         protected boolean lessThan(Sub a, Sub b) {
@@ -68,112 +132,71 @@ public class DocIDMerger<T extends DocIDMerger.Sub> {
           return a.mappedDocID < b.mappedDocID;
         }
       };
-    } else {
-      // We simply concatentate
-      queue = null;
+      reset();
     }
 
-    reset();
-  }
-
-  /** Construct this from the provided subs */
-  public DocIDMerger(List<T> subs, boolean indexIsSorted) {
-    this(subs, subs.size(), indexIsSorted);
-  }
-
-  /** Reuse API, currently only used by postings during merge */
-  public void reset() {
-    if (queue != null) {
+    @Override
+    public void reset() {
       // caller may not have fully consumed the queue:
       queue.clear();
+      boolean first = true;
       for(T sub : subs) {
-        while (true) {
-          int docID = sub.nextDoc();
-          if (docID == NO_MORE_DOCS) {
-            // all docs in this sub were deleted; do not add it to the queue!
-            break;
+        if (first) {
+          // by setting mappedDocID = -1, this entry is guaranteed to be the top of the queue
+          // so the first call to next() will advance it
+          sub.mappedDocID = -1;
+          first = false;
+        } else {
+          int mappedDocID;
+          while (true) {
+            int docID = sub.nextDoc();
+            if (docID == NO_MORE_DOCS) {
+              mappedDocID = NO_MORE_DOCS;
+              break;
+            }
+            mappedDocID = sub.docMap.get(docID);
+            if (mappedDocID != -1) {
+              break;
+            }
           }
-
-          int mappedDocID = sub.docMap.get(docID);
-          if (mappedDocID == -1) {
-            // doc was deleted
+          if (mappedDocID == NO_MORE_DOCS) {
+            // all docs in this sub were deleted; do not add it to the queue!
             continue;
-          } else {
-            sub.mappedDocID = mappedDocID;
-            queue.add(sub);
-            break;
           }
+          sub.mappedDocID = mappedDocID;
         }
+        queue.add(sub);
       }
-      first = true;
-    } else if (subs.size() > 0) {
-      current = subs.get(0);
-      nextIndex = 1;
-    } else {
-      current = null;
-      nextIndex = 0;
     }
-  }
 
-  /** Returns null when done */
-  public T next() {
-    // Loop until we find a non-deleted document
-    if (queue != null) {
+    @Override
+    public T next() {
       T top = queue.top();
       if (top == null) {
         // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
         return null;
       }
 
-      if (first == false) {
-        while (true) {
-          int docID = top.nextDoc();
-          if (docID == NO_MORE_DOCS) {
-            queue.pop();
-            top = queue.top();
-            break;
-          }
-          int mappedDocID = top.docMap.get(docID);
-          if (mappedDocID == -1) {
-            // doc was deleted
-            continue;
-          } else {
-            top.mappedDocID = mappedDocID;
-            top = queue.updateTop();
-            break;
-          }
-        }
-      }
-
-      first = false;
-
-      return top;
-
-    } else {
       while (true) {
-        if (current == null) {
-          // NOTE: it's annoying that caller is allowed to call us again even after we returned null before
-          return null;
-        }
-        int docID = current.nextDoc();
+        int docID = top.nextDoc();
         if (docID == NO_MORE_DOCS) {
-          if (nextIndex == subs.size()) {
-            current = null;
-            return null;
-          }
-          current = subs.get(nextIndex);
-          nextIndex++;
-          continue;
+          queue.pop();
+          top = queue.top();
+          break;
         }
-        int mappedDocID = current.docMap.get(docID);
+        int mappedDocID = top.docMap.get(docID);
         if (mappedDocID == -1) {
-          // doc is deleted
+          // doc was deleted
           continue;
+        } else {
+          top.mappedDocID = mappedDocID;
+          top = queue.updateTop();
+          break;
         }
-
-        current.mappedDocID = mappedDocID;
-        return current;
       }
+
+      return top;
     }
   }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
index d93c771..6672d64 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
@@ -62,7 +62,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
     for(int i=0;i<allSubs.length;i++) {
       allSubs[i] = new MappingPostingsSub(mergeState.docMaps[i]);
     }
-    this.docIDMerger = new DocIDMerger<MappingPostingsSub>(subs, allSubs.length, mergeState.needsIndexSort);
+    this.docIDMerger = DocIDMerger.of(subs, allSubs.length, mergeState.needsIndexSort);
   }
 
   MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c395db/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
index d957646..5e19bbb 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocIDMerger.java
@@ -70,7 +70,7 @@ public class TestDocIDMerger extends LuceneTestCase {
       valueStart += maxDoc;
     }
 
-    DocIDMerger<TestSubUnsorted> merger = new DocIDMerger<>(subs, false);
+    DocIDMerger<TestSubUnsorted> merger = DocIDMerger.of(subs, false);
 
     int count = 0;
     while (true) {
@@ -175,7 +175,7 @@ public class TestDocIDMerger extends LuceneTestCase {
         }, docMap.length, i));
     }
 
-    DocIDMerger<TestSubSorted> merger = new DocIDMerger<>(subs, true);
+    DocIDMerger<TestSubSorted> merger = DocIDMerger.of(subs, true);
 
     int count = 0;
     while (true) {