You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/28 11:43:52 UTC
svn commit: r1237038 - in
/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene: codecs/
codecs/lucene40/values/ index/
Author: mikemccand
Date: Sat Jan 28 10:43:52 2012
New Revision: 1237038
URL: http://svn.apache.org/viewvc?rev=1237038&view=rev
Log:
drop 100% deleted segments before merging
Modified:
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/MergeState.java
lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java Sat Jan 28 10:43:52 2012
@@ -78,6 +78,7 @@ public final class MappingMultiDocsEnum
current = subs[upto].docsEnum;
currentBase = mergeState.docBase[reader];
currentMap = mergeState.docMaps[reader];
+ assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length;
}
}
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java Sat Jan 28 10:43:52 2012
@@ -109,7 +109,7 @@ public class Floats {
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
- assert size == 4 || size == 8;
+ assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
}
@Override
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Sat Jan 28 10:43:52 2012
@@ -206,7 +206,7 @@ class BufferedDeletesStream {
delIDX--;
} else if (packet != null && segGen == packet.delGen()) {
- assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet";
+ assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen;
//System.out.println(" eq");
// Lock order: IW -> BD -> RP
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sat Jan 28 10:43:52 2012
@@ -3036,10 +3036,17 @@ public class IndexWriter implements Clos
// newly flushed deletes but mapping them to the new
// docIDs.
+ // Since we copy-on-write, if any new deletes were
+ // applied after merging has started, we can just
+ // check if the before/after liveDocs have changed.
+ // If so, we must carefully merge the liveDocs one
+ // doc at a time:
if (currentLiveDocs != prevLiveDocs) {
+
// This means this segment received new deletes
// since we started the merge, so we
// must merge them:
+ final int startDocUpto = docUpto;
for(int j=0;j<docCount;j++) {
if (!prevLiveDocs.get(j)) {
assert !currentLiveDocs.get(j);
@@ -3055,13 +3062,9 @@ public class IndexWriter implements Clos
}
}
} else {
- final int readerDocCount;
- if (i == sourceSegments.size()-1) {
- readerDocCount = mergeState.mergedDocCount - mergeState.docBase[i];
- } else {
- readerDocCount = mergeState.docBase[i+1] - mergeState.docBase[i];
- }
- docUpto += readerDocCount;
+ assert mergeState.readers != null;
+ assert mergeState.segmentDocCounts != null;
+ docUpto += mergeState.segmentDocCounts.get(info);
}
} else if (currentLiveDocs != null) {
// This segment had no deletes before but now it
@@ -3600,10 +3603,8 @@ public class IndexWriter implements Clos
}
}
}
-
merge.readerLiveDocs.add(liveDocs);
merge.readers.add(reader);
-
merger.add(reader, liveDocs);
segUpto++;
}
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/MergeState.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/MergeState.java Sat Jan 28 10:43:52 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.util.List;
+import java.util.Map;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
@@ -41,10 +42,11 @@ public class MergeState {
}
public FieldInfos fieldInfos;
- public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
- public int[][] docMaps; // Maps docIDs around deletions
- public int[] docBase; // New docID base per reader
- public int mergedDocCount; // Total # merged docs
+ public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
+ public int[][] docMaps; // Maps docIDs around deletions
+ public int[] docBase; // New docID base per reader
+ public Map<SegmentInfo,Integer> segmentDocCounts; // Non-deleted docCount per reader
+ public int mergedDocCount; // Total # merged docs
public CheckAbort checkAbort;
public InfoStream infoStream;
Modified: lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1237038&r1=1237037&r2=1237038&view=diff
==============================================================================
--- lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/lucene3661/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sat Jan 28 10:43:52 2012
@@ -104,12 +104,7 @@ final class SegmentMerger {
// IndexWriter.close(false) takes to actually stop the
// threads.
- final int numReaders = mergeState.readers.size();
- // Remap docIDs
- mergeState.docMaps = new int[numReaders][];
- mergeState.docBase = new int[numReaders];
- mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
- mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
+ setDocMaps();
mergeFieldInfos();
setMatchingSegmentReaders();
@@ -283,37 +278,44 @@ final class SegmentMerger {
}
}
- private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
- int docBase = 0;
-
- final List<Fields> fields = new ArrayList<Fields>();
- final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
+ private int[] shrink(int[] in, int size) {
+ final int[] newArray = new int[size];
+ System.arraycopy(in, 0, newArray, 0, size);
+ return newArray;
+ }
- for(MergeState.IndexReaderAndLiveDocs r : mergeState.readers) {
- final Fields f = r.reader.fields();
- final int maxDoc = r.reader.maxDoc();
- if (f != null) {
- slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
- fields.add(f);
- }
- docBase += maxDoc;
- }
+ private int[][] shrink(int[][] in, int size) {
+ final int[][] newArray = new int[size][];
+ System.arraycopy(in, 0, newArray, 0, size);
+ return newArray;
+ }
+ // NOTE: removes any "all deleted" readers from mergeState.readers
+ private void setDocMaps() throws IOException {
final int numReaders = mergeState.readers.size();
- docBase = 0;
+ // Remap docIDs
+ mergeState.docMaps = new int[numReaders][];
+ mergeState.docBase = new int[numReaders];
+ mergeState.segmentDocCounts = new HashMap<SegmentInfo,Integer>();
+ mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
+ mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
+
+ int docBase = 0;
- for(int i=0;i<numReaders;i++) {
+ int i = 0;
+ while(i < mergeState.readers.size()) {
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
mergeState.docBase[i] = docBase;
final int maxDoc = reader.reader.maxDoc();
- if (reader.liveDocs != null) {
+ final int docCount;
+ final Bits liveDocs = reader.liveDocs;
+ final int[] docMap;
+ if (liveDocs != null) {
int delCount = 0;
- final Bits liveDocs = reader.liveDocs;
- assert liveDocs != null;
- final int[] docMap = mergeState.docMaps[i] = new int[maxDoc];
+ docMap = new int[maxDoc];
int newDocID = 0;
for(int j=0;j<maxDoc;j++) {
if (!liveDocs.get(j)) {
@@ -323,14 +325,56 @@ final class SegmentMerger {
docMap[j] = newDocID++;
}
}
- docBase += maxDoc - delCount;
+ docCount = maxDoc - delCount;
} else {
- docBase += maxDoc;
+ docCount = maxDoc;
+ docMap = null;
}
+ if (reader.reader instanceof SegmentReader) {
+ mergeState.segmentDocCounts.put(((SegmentReader) reader.reader).getSegmentInfo(), docCount);
+ }
+
+ if (docCount == 0) {
+ // Skip this reader (all docs are deleted):
+ mergeState.readers.remove(i);
+ continue;
+ }
+
+ mergeState.docMaps[i] = docMap;
+ docBase += docCount;
+
if (mergeState.payloadProcessorProvider != null) {
mergeState.dirPayloadProcessor[i] = mergeState.payloadProcessorProvider.getDirProcessor(reader.reader.directory());
}
+
+ i++;
+ }
+
+ final int numReadersLeft = mergeState.readers.size();
+
+ if (numReadersLeft < mergeState.docMaps.length) {
+ mergeState.docMaps = shrink(mergeState.docMaps, numReadersLeft);
+ mergeState.docBase = shrink(mergeState.docBase, numReadersLeft);
+ }
+ }
+
+ private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
+
+ final List<Fields> fields = new ArrayList<Fields>();
+ final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
+
+ int docBase = 0;
+
+ for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
+ final MergeState.IndexReaderAndLiveDocs r = mergeState.readers.get(readerIndex);
+ final Fields f = r.reader.fields();
+ final int maxDoc = r.reader.maxDoc();
+ if (f != null) {
+ slices.add(new ReaderUtil.Slice(docBase, maxDoc, readerIndex));
+ fields.add(f);
+ }
+ docBase += maxDoc;
}
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);