You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/02/22 00:57:07 UTC
svn commit: r1448861 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/CHANGES.txt lucene/core/
lucene/core/src/java/org/apache/lucene/index/MergeState.java
lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
Author: jpountz
Date: Thu Feb 21 23:57:07 2013
New Revision: 1448861
URL: http://svn.apache.org/r1448861
Log:
LUCENE-4792: Reduction of the memory required to build the doc ID maps used when merging segments (merged from r1448853).
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1448861&r1=1448860&r2=1448861&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Feb 21 23:57:07 2013
@@ -97,6 +97,9 @@ Optimizations
table-compression, etc), and memory addresses use MonotonicBlockPackedWriter.
(Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
+* LUCENE-4792: Reduction of the memory required to build the doc ID maps used
+ when merging segments. (Adrien Grand)
+
New Features
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1448861&r1=1448860&r2=1448861&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MergeState.java Thu Feb 21 23:57:07 2013
@@ -22,7 +22,7 @@ import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/** Holds common state used during segment merging.
*
@@ -33,86 +33,73 @@ public class MergeState {
* Remaps docids around deletes during merge
*/
public static abstract class DocMap {
- private final Bits liveDocs;
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected DocMap(Bits liveDocs) {
- this.liveDocs = liveDocs;
+ DocMap() {}
+
+ /** Returns the mapped docID corresponding to the provided one. */
+ public abstract int get(int docID);
+
+ /** Returns the total number of documents, ignoring
+ * deletions. */
+ public abstract int maxDoc();
+
+ /** Returns the number of not-deleted documents. */
+ public final int numDocs() {
+ return maxDoc() - numDeletedDocs();
+ }
+
+ /** Returns the number of deleted documents. */
+ public abstract int numDeletedDocs();
+
+ /** Returns true if there are any deletions. */
+ public boolean hasDeletions() {
+ return numDeletedDocs() > 0;
}
/** Creates a {@link DocMap} instance appropriate for
* this reader. */
public static DocMap build(AtomicReader reader) {
final int maxDoc = reader.maxDoc();
- final int numDeletes = reader.numDeletedDocs();
- final int numDocs = maxDoc - numDeletes;
- assert reader.getLiveDocs() != null || numDeletes == 0;
- if (numDeletes == 0) {
+ if (!reader.hasDeletions()) {
return new NoDelDocMap(maxDoc);
- } else if (numDeletes < numDocs) {
- return buildDelCountDocmap(maxDoc, numDeletes, reader.getLiveDocs(), PackedInts.COMPACT);
- } else {
- return buildDirectDocMap(maxDoc, numDocs, reader.getLiveDocs(), PackedInts.COMPACT);
}
+ final Bits liveDocs = reader.getLiveDocs();
+ return build(maxDoc, liveDocs);
}
- static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) {
- PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc,
- PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio);
+ static DocMap build(final int maxDoc, final Bits liveDocs) {
+ assert liveDocs != null;
+ final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
+ docMap.add(i - del);
if (!liveDocs.get(i)) {
++del;
}
- numDeletesSoFar.set(i, del);
}
- assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes;
- return new DelCountDocMap(liveDocs, numDeletesSoFar);
- }
-
- static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) {
- PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc,
- PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio);
- int del = 0;
- for (int i = 0; i < maxDoc; ++i) {
- if (liveDocs.get(i)) {
- docIds.set(i, i - del);
- } else {
- ++del;
+ final int numDeletedDocs = del;
+ assert docMap.size() == maxDoc;
+ return new DocMap() {
+
+ @Override
+ public int get(int docID) {
+ if (!liveDocs.get(docID)) {
+ return -1;
+ }
+ return (int) docMap.get(docID);
}
- }
- assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs;
- return new DirectDocMap(liveDocs, docIds, del);
- }
-
- /** Returns the mapped docID corresponding to the provided one. */
- public int get(int docId) {
- if (liveDocs == null || liveDocs.get(docId)) {
- return remap(docId);
- } else {
- return -1;
- }
- }
-
- /** Returns the mapped docID corresponding to the provided one. */
- public abstract int remap(int docId);
- /** Returns the total number of documents, ignoring
- * deletions. */
- public abstract int maxDoc();
-
- /** Returns the number of not-deleted documents. */
- public final int numDocs() {
- return maxDoc() - numDeletedDocs();
- }
+ @Override
+ public int maxDoc() {
+ return maxDoc;
+ }
- /** Returns the number of deleted documents. */
- public abstract int numDeletedDocs();
+ @Override
+ public int numDeletedDocs() {
+ return numDeletedDocs;
+ }
- /** Returns true if there are any deletions. */
- public boolean hasDeletions() {
- return numDeletedDocs() > 0;
+ };
}
}
@@ -122,13 +109,12 @@ public class MergeState {
private final int maxDoc;
private NoDelDocMap(int maxDoc) {
- super(null);
this.maxDoc = maxDoc;
}
@Override
- public int remap(int docId) {
- return docId;
+ public int get(int docID) {
+ return docID;
}
@Override
@@ -142,59 +128,6 @@ public class MergeState {
}
}
- private static class DirectDocMap extends DocMap {
-
- private final PackedInts.Mutable docIds;
- private final int numDeletedDocs;
-
- private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) {
- super(liveDocs);
- this.docIds = docIds;
- this.numDeletedDocs = numDeletedDocs;
- }
-
- @Override
- public int remap(int docId) {
- return (int) docIds.get(docId);
- }
-
- @Override
- public int maxDoc() {
- return docIds.size();
- }
-
- @Override
- public int numDeletedDocs() {
- return numDeletedDocs;
- }
- }
-
- private static class DelCountDocMap extends DocMap {
-
- private final PackedInts.Mutable numDeletesSoFar;
-
- private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) {
- super(liveDocs);
- this.numDeletesSoFar = numDeletesSoFar;
- }
-
- @Override
- public int remap(int docId) {
- return docId - (int) numDeletesSoFar.get(docId);
- }
-
- @Override
- public int maxDoc() {
- return numDeletesSoFar.size();
- }
-
- @Override
- public int numDeletedDocs() {
- final int maxDoc = maxDoc();
- return (int) numDeletesSoFar.get(maxDoc - 1);
- }
- }
-
/** {@link SegmentInfo} of the newly merged segment. */
public SegmentInfo segmentInfo;
@@ -213,13 +146,13 @@ public class MergeState {
/** Holds the CheckAbort instance, which is invoked
* periodically to see if the merge has been aborted. */
public CheckAbort checkAbort;
-
+
/** InfoStream for debugging messages. */
public InfoStream infoStream;
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
-
+
/** {@link SegmentReader}s that have identical field
* name/number mapping, so their stored fields and term
* vectors may be bulk merged. */
@@ -231,7 +164,7 @@ public class MergeState {
/** Sole constructor. */
MergeState() {
}
-
+
/**
* Class for recording units of work when merging segments.
*/
@@ -261,7 +194,7 @@ public class MergeState {
workCount = 0;
}
}
-
+
/** If you use this: IW.close(false) cannot abort your merge!
* @lucene.internal */
static final MergeState.CheckAbort NONE = new MergeState.CheckAbort(null, null) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1448861&r1=1448860&r2=1448861&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java Thu Feb 21 23:57:07 2013
@@ -154,27 +154,33 @@ public class TestSegmentMerger extends L
}
public void testBuildDocMap() {
- final int maxDoc = 128;
+ final int maxDoc = _TestUtil.nextInt(random(), 1, 128);
+ final int numDocs = _TestUtil.nextInt(random(), 0, maxDoc);
+ final int numDeletedDocs = maxDoc - numDocs;
final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
-
- MergeState.DocMap docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
- MergeState.DocMap docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
- assertTrue(equals(docMap1, docMap2));
-
- liveDocs.set(1);
- for (int i = 7; i < 79; ++i) {
- liveDocs.set(i);
+ for (int i = 0; i < numDocs; ++i) {
+ while (true) {
+ final int docID = random().nextInt(maxDoc);
+ if (!liveDocs.get(docID)) {
+ liveDocs.set(docID);
+ break;
+ }
+ }
}
- liveDocs.set(80);
- liveDocs.set(88);
- int numDocs = liveDocs.cardinality();
- docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc - numDocs, liveDocs, PackedInts.COMPACT);
- docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, numDocs, liveDocs, PackedInts.COMPACT);
- assertTrue(equals(docMap1, docMap2));
- liveDocs.set(0, maxDoc);
- docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
- docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
- assertTrue(equals(docMap1, docMap2));
+ final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs);
+
+ assertEquals(maxDoc, docMap.maxDoc());
+ assertEquals(numDocs, docMap.numDocs());
+ assertEquals(numDeletedDocs, docMap.numDeletedDocs());
+ // assert the mapping is compact
+ for (int i = 0, del = 0; i < maxDoc; ++i) {
+ if (!liveDocs.get(i)) {
+ assertEquals(-1, docMap.get(i));
+ ++del;
+ } else {
+ assertEquals(i - del, docMap.get(i));
+ }
+ }
}
}