You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/09 02:04:13 UTC
svn commit: r1068718 [5/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/ant/
dev-tools/maven/lucene/contrib/db/bdb-je...
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed Feb 9 01:03:49 2011
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
@@ -47,6 +48,7 @@ import org.apache.lucene.store.LockObtai
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.MapBackedSet;
/**
An <code>IndexWriter</code> creates and maintains an index.
@@ -214,7 +216,6 @@ public class IndexWriter implements Clos
private long lastCommitChangeCount; // last changeCount that was committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
- private HashMap<SegmentInfo,Integer> rollbackSegments;
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
@@ -250,7 +251,7 @@ public class IndexWriter implements Clos
private final AtomicInteger flushDeletesCount = new AtomicInteger();
final ReaderPool readerPool = new ReaderPool();
- final BufferedDeletes bufferedDeletes;
+ final BufferedDeletesStream bufferedDeletesStream;
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
@@ -273,6 +274,10 @@ public class IndexWriter implements Clos
// for testing
boolean anyNonBulkMerges;
+ IndexReader getReader() throws IOException {
+ return getReader(true);
+ }
+
/**
* Expert: returns a readonly reader, covering all
* committed as well as un-committed changes to the index.
@@ -332,9 +337,9 @@ public class IndexWriter implements Clos
*
* @throws IOException
*/
- IndexReader getReader() throws IOException {
+ IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
-
+
final long tStart = System.currentTimeMillis();
if (infoStream != null) {
@@ -351,8 +356,8 @@ public class IndexWriter implements Clos
// just like we do when loading segments_N
IndexReader r;
synchronized(this) {
- flush(false, true);
- r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+ flush(false, applyAllDeletes);
+ r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
if (infoStream != null) {
message("return reader version=" + r.getVersion() + " reader=" + r);
}
@@ -365,6 +370,13 @@ public class IndexWriter implements Clos
return r;
}
+ // Used for all SegmentReaders we open
+ private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
+
+ Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
+ return readerFinishedListeners;
+ }
+
/** Holds shared SegmentReader instances. IndexWriter uses
* SegmentReaders for 1) applying deletes, 2) doing
* merges, 3) handing out a real-time reader. This pool
@@ -574,6 +586,7 @@ public class IndexWriter implements Clos
// synchronized
// Returns a ref, which we xfer to readerMap:
sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+ sr.readerFinishedListeners = readerFinishedListeners;
if (info.dir == directory) {
// Only pool if reader is not external
@@ -673,7 +686,7 @@ public class IndexWriter implements Clos
* according <code>conf.getOpenMode()</code>.
* @param conf
* the configuration settings according to which IndexWriter should
- * be initalized.
+ * be initialized.
* @throws CorruptIndexException
* if the index is corrupt
* @throws LockObtainFailedException
@@ -698,8 +711,8 @@ public class IndexWriter implements Clos
mergedSegmentWarmer = conf.getMergedSegmentWarmer();
codecs = conf.getCodecProvider();
- bufferedDeletes = new BufferedDeletes(messageID);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream = new BufferedDeletesStream(messageID);
+ bufferedDeletesStream.setInfoStream(infoStream);
poolReaders = conf.getReaderPooling();
OpenMode mode = conf.getOpenMode();
@@ -764,7 +777,7 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -851,10 +864,6 @@ public class IndexWriter implements Clos
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
- rollbackSegments = new HashMap<SegmentInfo,Integer>();
- final int size = rollbackSegmentInfos.size();
- for(int i=0;i<size;i++)
- rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
}
/**
@@ -916,7 +925,7 @@ public class IndexWriter implements Clos
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
deleter.setInfoStream(infoStream);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream.setInfoStream(infoStream);
if (infoStream != null)
messageState();
}
@@ -1162,7 +1171,7 @@ public class IndexWriter implements Clos
public synchronized boolean hasDeletions() throws IOException {
ensureOpen();
- if (bufferedDeletes.any()) {
+ if (bufferedDeletesStream.any()) {
return true;
}
if (docWriter.anyDeletions()) {
@@ -1511,6 +1520,11 @@ public class IndexWriter implements Clos
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @see MergePolicy#findMergesForOptimize
@@ -1660,6 +1674,11 @@ public class IndexWriter implements Clos
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
+ *
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
*/
public void expungeDeletes(boolean doWait)
throws CorruptIndexException, IOException {
@@ -1810,6 +1829,18 @@ public class IndexWriter implements Clos
}
}
+ /** Expert: to be used by a {@link MergePolicy} to avoid
+ * selecting merges for segments already being merged.
+ * The returned collection is not cloned, and thus is
+ * only safe to access if you hold IndexWriter's lock
+ * (which you do when IndexWriter invokes the
+ * MergePolicy).
+ *
+ * <p>Do not alter the returned collection! */
+ public synchronized Collection<SegmentInfo> getMergingSegments() {
+ return mergingSegments;
+ }
+
/** Expert: the {@link MergeScheduler} calls this method
* to retrieve the next merge requested by the
* MergePolicy */
@@ -1867,7 +1898,7 @@ public class IndexWriter implements Clos
mergePolicy.close();
mergeScheduler.close();
- bufferedDeletes.clear();
+ bufferedDeletesStream.clear();
synchronized(this) {
@@ -1930,8 +1961,9 @@ public class IndexWriter implements Clos
*
* <p>NOTE: this method will forcefully abort all merges
* in progress. If other threads are running {@link
- * #optimize()} or any of the addIndexes methods, they
- * will receive {@link MergePolicy.MergeAbortedException}s.
+ * #optimize()}, {@link #addIndexes(IndexReader[])} or
+ * {@link #expungeDeletes} methods, they may receive
+ * {@link MergePolicy.MergeAbortedException}s.
*/
public synchronized void deleteAll() throws IOException {
try {
@@ -2211,6 +2243,11 @@ public class IndexWriter implements Clos
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@@ -2438,13 +2475,13 @@ public class IndexWriter implements Clos
}
/**
- * Flush all in-memory buffered udpates (adds and deletes)
+ * Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
* @param triggerMerge if true, we may merge segments (if
* deletes or docs were flushed) if necessary
- * @param flushDeletes whether pending deletes should also
+ * @param applyAllDeletes whether pending deletes should also
*/
- protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+ protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
@@ -2455,7 +2492,7 @@ public class IndexWriter implements Clos
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
- if (doFlush(flushDeletes) && triggerMerge) {
+ if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}
@@ -2504,10 +2541,10 @@ public class IndexWriter implements Clos
// tiny segments:
if (flushControl.getFlushDeletes() ||
(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+ bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
applyAllDeletes = true;
if (infoStream != null) {
- message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+ message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
}
}
}
@@ -2517,12 +2554,15 @@ public class IndexWriter implements Clos
message("apply all deletes during flush");
}
flushDeletesCount.incrementAndGet();
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+ if (result.anyDeletes) {
checkpoint();
}
+ bufferedDeletesStream.prune(segmentInfos);
+ assert !bufferedDeletesStream.any();
flushControl.clearDeletes();
} else if (infoStream != null) {
- message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+ message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
doAfterFlush();
@@ -2548,7 +2588,7 @@ public class IndexWriter implements Clos
*/
public final long ramSizeInBytes() {
ensureOpen();
- return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+ return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
}
/** Expert: Return the number of documents currently
@@ -2558,28 +2598,12 @@ public class IndexWriter implements Clos
return docWriter.getNumDocs();
}
- private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
- int first = segmentInfos.indexOf(merge.segments.info(0));
- if (first == -1)
- throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
- final int numSegments = segmentInfos.size();
-
- final int numSegmentsToMerge = merge.segments.size();
- for(int i=0;i<numSegmentsToMerge;i++) {
- final SegmentInfo info = merge.segments.info(i);
-
- if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
- if (segmentInfos.indexOf(info) == -1)
- throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
- else
- throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
- directory);
+ private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ for(SegmentInfo info : merge.segments) {
+ if (segmentInfos.indexOf(info) == -1) {
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
-
- return first;
}
/** Carefully merges deletes for the segments we just
@@ -2604,9 +2628,11 @@ public class IndexWriter implements Clos
// started merging:
int docUpto = 0;
int delCount = 0;
+ long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
SegmentInfo info = sourceSegments.info(i);
+ minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
SegmentReader previousReader = merge.readersClone[i];
final Bits prevDelDocs = previousReader.getDeletedDocs();
@@ -2657,9 +2683,17 @@ public class IndexWriter implements Clos
assert mergedReader.numDeletedDocs() == delCount;
mergedReader.hasChanges = delCount > 0;
+
+ // If new deletes were applied while we were merging
+ // (which happens if eg commit() or getReader() is
+ // called during our merge), then it better be the case
+ // that the delGen has increased for all our merged
+ // segments:
+ assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+ mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
}
- /* FIXME if we want to support non-contiguous segment merges */
synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
assert testPoint("startCommitMerge");
@@ -2685,7 +2719,7 @@ public class IndexWriter implements Clos
return false;
}
- final int start = ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
commitMergedDeletes(merge, mergedReader);
@@ -2695,10 +2729,32 @@ public class IndexWriter implements Clos
// format as well:
setMergeDocStoreIsCompoundFile(merge);
- segmentInfos.subList(start, start + merge.segments.size()).clear();
assert !segmentInfos.contains(merge.info);
- segmentInfos.add(start, merge.info);
-
+
+ final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ int segIdx = 0;
+ int newSegIdx = 0;
+ boolean inserted = false;
+ final int curSegCount = segmentInfos.size();
+ while(segIdx < curSegCount) {
+ final SegmentInfo info = segmentInfos.info(segIdx++);
+ if (mergedAway.contains(info)) {
+ if (!inserted) {
+ segmentInfos.set(segIdx-1, merge.info);
+ inserted = true;
+ newSegIdx++;
+ }
+ } else {
+ segmentInfos.set(newSegIdx++, info);
+ }
+ }
+ assert newSegIdx == curSegCount - merge.segments.size() + 1;
+ segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+ if (infoStream != null) {
+ message("after commit: " + segString());
+ }
+
closeMergeReaders(merge, false);
// Must note the change to segmentInfos so any commits
@@ -2710,16 +2766,12 @@ public class IndexWriter implements Clos
// disk, updating SegmentInfo, etc.:
readerPool.clear(merge.segments);
- // remove pending deletes of the segments
- // that were merged, moving them onto the segment just
- // before the merged segment
- // Lock order: IW -> BD
- bufferedDeletes.commitMerge(merge);
-
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
+
+
return true;
}
@@ -2847,7 +2899,7 @@ public class IndexWriter implements Clos
}
}
- ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
pendingMerges.add(merge);
@@ -2874,10 +2926,6 @@ public class IndexWriter implements Clos
final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
boolean success = false;
try {
- // Lock order: IW -> BD
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
- checkpoint();
- }
_mergeInit(merge);
success = true;
} finally {
@@ -2901,6 +2949,9 @@ public class IndexWriter implements Clos
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
+ // TODO: is there any perf benefit to sorting
+ // merged segments? eg biggest to smallest?
+
if (merge.info != null)
// mergeInit already done
return;
@@ -2913,6 +2964,17 @@ public class IndexWriter implements Clos
// names.
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ // Lock order: IW -> BD
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+ if (result.anyDeletes) {
+ checkpoint();
+ }
+
+ merge.info.setBufferedDeletesGen(result.gen);
+
+ // Lock order: IW -> BD
+ bufferedDeletesStream.prune(segmentInfos);
+
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -3476,7 +3538,7 @@ public class IndexWriter implements Clos
}
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
}
synchronized boolean isClosed() {
@@ -3643,7 +3705,7 @@ public class IndexWriter implements Clos
final double ramBufferSizeMB = config.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
final long limit = (long) (ramBufferSizeMB*1024*1024);
- long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
// DocumentsWriter may be able to free up some
@@ -3651,7 +3713,7 @@ public class IndexWriter implements Clos
// Lock order: FC -> DW
docWriter.balanceRAM();
- used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
return setFlushPending("ram full: " + reason, false);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Feb 9 01:03:49 2011
@@ -552,10 +552,13 @@ public final class IndexWriterConfig imp
/** Sets the termsIndexDivisor passed to any readers that
* IndexWriter opens, for example when applying deletes
* or creating a near-real-time reader in {@link
- * IndexWriter#getReader}. */
+ * IndexWriter#getReader}. If you pass -1, the terms index
+ * won't be loaded by the readers. This is only useful in
+ * advanced situations when you will only .next() through
+ * all terms; attempts to seek will hit an exception. */
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
- if (divisor <= 0) {
- throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+ if (divisor <= 0 && divisor != -1) {
+ throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
}
readerTermsIndexDivisor = divisor;
return this;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Feb 9 01:03:49 2011
@@ -18,6 +18,11 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
import java.util.Set;
/** <p>This class implements a {@link MergePolicy} that tries
@@ -67,6 +72,7 @@ public abstract class LogMergePolicy ext
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+ protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -105,6 +111,21 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
+ /** If true, merges must be in-order slice of the
+ * segments. If false, then the merge policy is free to
+ * pick any segments. The default is false, which is
+ * in general more efficient than true since it gives the
+ * merge policy more freedom to pick closely sized
+ * segments. */
+ public void setRequireContiguousMerge(boolean v) {
+ requireContiguousMerge = v;
+ }
+
+ /** See {@link #setRequireContiguousMerge}. */
+ public boolean getRequireContiguousMerge() {
+ return requireContiguousMerge;
+ }
+
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -356,6 +377,8 @@ public abstract class LogMergePolicy ext
}
return null;
}
+
+ // TODO: handle non-contiguous merge case differently?
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
@@ -454,6 +477,36 @@ public abstract class LogMergePolicy ext
return spec;
}
+ private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
+ SegmentInfo info;
+ float level;
+ int index;
+
+ public SegmentInfoAndLevel(SegmentInfo info, float level, int index) {
+ this.info = info;
+ this.level = level;
+ this.index = index;
+ }
+
+ // Sorts largest to smallest
+ public int compareTo(SegmentInfoAndLevel other) {
+ if (level < other.level)
+ return 1;
+ else if (level > other.level)
+ return -1;
+ else
+ return 0;
+ }
+ }
+
+ private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
+ public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
+ return o1.index - o2.index;
+ }
+ }
+
+ private static final SortByIndex sortByIndex = new SortByIndex();
+
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -470,18 +523,37 @@ public abstract class LogMergePolicy ext
// Compute levels, which is just log (base mergeFactor)
// of the size of each segment
- float[] levels = new float[numSegments];
+ final List<SegmentInfoAndLevel> levels = new ArrayList<SegmentInfoAndLevel>();
final float norm = (float) Math.log(mergeFactor);
+ final Collection<SegmentInfo> mergingSegments = writer.get().getMergingSegments();
+
for(int i=0;i<numSegments;i++) {
final SegmentInfo info = infos.info(i);
long size = size(info);
+ // When we require contiguous merge, we still add the
+ // segment to levels to avoid merging "across" a set
+ // of segment being merged:
+ if (!requireContiguousMerge && mergingSegments.contains(info)) {
+ if (verbose()) {
+ message("seg " + info.name + " already being merged; skip");
+ }
+ continue;
+ }
+
// Floor tiny segments
- if (size < 1)
+ if (size < 1) {
size = 1;
- levels[i] = (float) Math.log(size)/norm;
- message("seg " + info.name + " level=" + levels[i]);
+ }
+ levels.add(new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i));
+ if (verbose()) {
+ message("seg " + info.name + " level=" + levels.get(i).level + " size=" + size);
+ }
+ }
+
+ if (!requireContiguousMerge) {
+ Collections.sort(levels);
}
final float levelFloor;
@@ -499,14 +571,16 @@ public abstract class LogMergePolicy ext
MergeSpecification spec = null;
+ final int numMergeableSegments = levels.size();
+
int start = 0;
- while(start < numSegments) {
+ while(start < numMergeableSegments) {
// Find max level of all segments not already
// quantized.
- float maxLevel = levels[start];
- for(int i=1+start;i<numSegments;i++) {
- final float level = levels[i];
+ float maxLevel = levels.get(start).level;
+ for(int i=1+start;i<numMergeableSegments;i++) {
+ final float level = levels.get(i).level;
if (level > maxLevel)
maxLevel = level;
}
@@ -525,9 +599,9 @@ public abstract class LogMergePolicy ext
levelBottom = levelFloor;
}
- int upto = numSegments-1;
+ int upto = numMergeableSegments-1;
while(upto >= start) {
- if (levels[upto] >= levelBottom) {
+ if (levels.get(upto).level >= levelBottom) {
break;
}
upto--;
@@ -540,18 +614,26 @@ public abstract class LogMergePolicy ext
while(end <= 1+upto) {
boolean anyTooLarge = false;
for(int i=start;i<end;i++) {
- final SegmentInfo info = infos.info(i);
+ final SegmentInfo info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
}
if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose())
+ if (verbose()) {
message(" " + start + " to " + end + ": add this merge");
- spec.add(new OneMerge(infos.range(start, end)));
- } else if (verbose())
+ }
+ Collections.sort(levels.subList(start, end), sortByIndex);
+ final SegmentInfos mergeInfos = new SegmentInfos();
+ for(int i=start;i<end;i++) {
+ mergeInfos.add(levels.get(i).info);
+ assert infos.contains(levels.get(i).info);
+ }
+ spec.add(new OneMerge(mergeInfos));
+ } else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+ }
start = end;
end = start + mergeFactor;
@@ -598,7 +680,8 @@ public abstract class LogMergePolicy ext
sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
- sb.append("useCompoundFile=").append(useCompoundFile);
+ sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("requireContiguousMerge=").append(requireContiguousMerge);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java Wed Feb 9 01:03:49 2011
@@ -20,13 +20,14 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.MapBackedSet;
/** An IndexReader which reads multiple indexes, appending
* their content. */
@@ -82,6 +83,7 @@ public class MultiReader extends IndexRe
}
}
starts[subReaders.length] = maxDoc;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
return ReaderUtil.buildReaderContext(this);
}
@@ -345,11 +347,6 @@ public class MultiReader extends IndexRe
subReaders[i].close();
}
}
-
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
}
@Override
@@ -386,7 +383,24 @@ public class MultiReader extends IndexRe
return subReaders;
}
+ @Override
public ReaderContext getTopReaderContext() {
return topLevelContext;
}
+
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ for(IndexReader sub : subReaders) {
+ sub.addReaderFinishedListener(listener);
+ }
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ for(IndexReader sub : subReaders) {
+ sub.removeReaderFinishedListener(listener);
+ }
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java Wed Feb 9 01:03:49 2011
@@ -23,7 +23,7 @@ import java.io.IOException;
* A {@link MergeScheduler} which never executes any merges. It is also a
* singleton and can be accessed through {@link NoMergeScheduler#INSTANCE}. Use
* it if you want to prevent an {@link IndexWriter} from ever executing merges,
- * irregardles of the {@link MergePolicy} used. Note that you can achieve the
+ * irregardless of the {@link MergePolicy} used. Note that you can achieve the
* same thing by using {@link NoMergePolicy}, however with
* {@link NoMergeScheduler} you also ensure that no unnecessary code of any
* {@link MergeScheduler} implementation is ever executed. Hence it is
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java Wed Feb 9 01:03:49 2011
@@ -22,11 +22,12 @@ import org.apache.lucene.document.FieldS
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MapBackedSet;
import java.io.IOException;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
/** An IndexReader which reads multiple, parallel indexes. Each index added
@@ -73,6 +74,7 @@ public class ParallelReader extends Inde
public ParallelReader(boolean closeSubReaders) throws IOException {
super();
this.incRefReaders = !closeSubReaders;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
}
/** {@inheritDoc} */
@@ -529,8 +531,6 @@ public class ParallelReader extends Inde
readers.get(i).close();
}
}
-
- FieldCache.DEFAULT.purge(this);
}
@Override
@@ -548,6 +548,21 @@ public class ParallelReader extends Inde
return topLevelReaderContext;
}
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ for (IndexReader reader : readers) {
+ reader.addReaderFinishedListener(listener);
+ }
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ for (IndexReader reader : readers) {
+ reader.removeReaderFinishedListener(listener);
+ }
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Payload.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Payload.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Payload.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Payload.java Wed Feb 9 01:03:49 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.ArrayUtil;
@@ -34,7 +32,7 @@ import org.apache.lucene.util.ArrayUtil;
* to retrieve the payloads from the index.<br>
*
*/
-public class Payload implements Serializable, Cloneable {
+public class Payload implements Cloneable {
/** the byte array containing the payload data */
protected byte[] data;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java Wed Feb 9 01:03:49 2011
@@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef;
/**
* Provides a {@link DirPayloadProcessor} to be used for a {@link Directory}.
- * This allows using differnt {@link DirPayloadProcessor}s for different
+ * This allows using different {@link DirPayloadProcessor}s for different
* directories, for e.g. to perform different processing of payloads of
* different directories.
* <p>
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Wed Feb 9 01:03:49 2011
@@ -204,6 +204,7 @@ final class PerFieldCodecWrapper extends
}
}
+ @Override
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new FieldsReader(state.dir, state.fieldInfos, state.segmentInfo,
@@ -213,7 +214,7 @@ final class PerFieldCodecWrapper extends
@Override
public void files(Directory dir, SegmentInfo info, String codecId, Set<String> files)
throws IOException {
- // ignore codecid sicne segmentCodec will assign it per codec
+ // ignore codecid since segmentCodec will assign it per codec
segmentCodecs.files(dir, info, files);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java Wed Feb 9 01:03:49 2011
@@ -103,7 +103,7 @@ public class PersistentSnapshotDeletionP
* @param mode
* specifies whether a new index should be created, deleting all
* existing snapshots information (immediately), or open an existing
- * index, initializing the class with the snapsthots information.
+ * index, initializing the class with the snapshots information.
* @param matchVersion
* specifies the {@link Version} that should be used when opening the
* IndexWriter.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Wed Feb 9 01:03:49 2011
@@ -66,11 +66,11 @@ public final class SegmentInfo {
private boolean isCompoundFile;
- private List<String> files; // cached list of files that this segment uses
+ private volatile List<String> files; // cached list of files that this segment uses
// in the Directory
- private long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
- private long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
+ private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
+ private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
@@ -94,6 +94,10 @@ public final class SegmentInfo {
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
+
+ // NOTE: only used in-RAM by IW to track buffered deletes;
+ // this is never written to/read from the Directory
+ private long bufferedDeletesGen;
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
@@ -241,24 +245,31 @@ public final class SegmentInfo {
*/
public long sizeInBytes(boolean includeDocStores) throws IOException {
if (includeDocStores) {
- if (sizeInBytesWithStore != -1) return sizeInBytesWithStore;
- sizeInBytesWithStore = 0;
+ if (sizeInBytesWithStore != -1) {
+ return sizeInBytesWithStore;
+ }
+ long sum = 0;
for (final String fileName : files()) {
- // We don't count bytes used by a shared doc store against this segment
+ // We don't count bytes used by a shared doc store
+ // against this segment
if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName)) {
- sizeInBytesWithStore += dir.fileLength(fileName);
+ sum += dir.fileLength(fileName);
}
}
+ sizeInBytesWithStore = sum;
return sizeInBytesWithStore;
} else {
- if (sizeInBytesNoStore != -1) return sizeInBytesNoStore;
- sizeInBytesNoStore = 0;
+ if (sizeInBytesNoStore != -1) {
+ return sizeInBytesNoStore;
+ }
+ long sum = 0;
for (final String fileName : files()) {
if (IndexFileNames.isDocStoreFile(fileName)) {
continue;
}
- sizeInBytesNoStore += dir.fileLength(fileName);
+ sum += dir.fileLength(fileName);
}
+ sizeInBytesNoStore = sum;
return sizeInBytesNoStore;
}
}
@@ -672,5 +683,12 @@ public final class SegmentInfo {
public String getVersion() {
return version;
}
-
+
+ long getBufferedDeletesGen() {
+ return bufferedDeletesGen;
+ }
+
+ void setBufferedDeletesGen(long v) {
+ bufferedDeletesGen = v;
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Wed Feb 9 01:03:49 2011
@@ -266,7 +266,7 @@ final class SegmentMerger {
// details.
throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
- segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo);
+ segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
return docCount;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java Wed Feb 9 01:03:49 2011
@@ -38,7 +38,6 @@ import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.index.codecs.FieldsProducer;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
/**
@@ -183,13 +182,9 @@ public class SegmentReader extends Index
storeCFSReader.close();
}
- // Force FieldCache to evict our entries at this
- // point. If the exception occurred while
- // initializing the core readers, then
- // origInstance will be null, and we don't want
- // to call FieldCache.purge (it leads to NPE):
+ // Now, notify any ReaderFinished listeners:
if (origInstance != null) {
- FieldCache.DEFAULT.purge(origInstance);
+ origInstance.notifyReaderFinishedListeners();
}
}
}
@@ -633,6 +628,7 @@ public class SegmentReader extends Index
clone.si = si;
clone.readBufferSize = readBufferSize;
clone.pendingDeleteCount = pendingDeleteCount;
+ clone.readerFinishedListeners = readerFinishedListeners;
if (!openReadOnly && hasChanges) {
// My pending changes transfer to the new reader
@@ -1203,4 +1199,14 @@ public class SegmentReader extends Index
public int getTermInfosIndexDivisor() {
return core.termsIndexDivisor;
}
+
+ @Override
+ protected void readerFinished() {
+ // Do nothing here -- we have more careful control on
+ // when to notify that a SegmentReader has finished,
+ // because a given core is shared across many cloned
+ // SegmentReaders. We only notify once that core is no
+ // longer used (all SegmentReaders sharing it have been
+ // closed).
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Wed Feb 9 01:03:49 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.PrintStream;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BitVector;
/**
* @lucene.experimental
@@ -32,6 +33,16 @@ public class SegmentWriteState {
public final int numDocs;
public boolean hasVectors;
+ // Deletes to apply while we are flushing the segment. A
+ // Term is enrolled in here if it was deleted at one
+ // point, and it's mapped to the docIDUpto, meaning any
+ // docID < docIDUpto containing this term should be
+ // deleted.
+ public final BufferedDeletes segDeletes;
+
+ // Lazily created:
+ public BitVector deletedDocs;
+
final SegmentCodecs segmentCodecs;
public final String codecId;
@@ -57,8 +68,9 @@ public class SegmentWriteState {
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
- int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs) {
+ int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
this.infoStream = infoStream;
+ this.segDeletes = segDeletes;
this.directory = directory;
this.segmentName = segmentName;
this.fieldInfos = fieldInfos;
@@ -80,5 +92,6 @@ public class SegmentWriteState {
termIndexInterval = state.termIndexInterval;
segmentCodecs = state.segmentCodecs;
this.codecId = codecId;
+ segDeletes = state.segDeletes;
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Term.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Term.java Wed Feb 9 01:03:49 2011
@@ -30,7 +30,7 @@ import org.apache.lucene.util.StringHelp
Note that terms may represent more than words from text fields, but also
things like dates, email addresses, urls, etc. */
-public final class Term implements Comparable<Term>, java.io.Serializable {
+public final class Term implements Comparable<Term> {
String field;
BytesRef bytes;
@@ -199,11 +199,4 @@ public final class Term implements Compa
@Override
public final String toString() { return field + ":" + bytes.utf8ToString(); }
-
- private void readObject(java.io.ObjectInputStream in)
- throws java.io.IOException, ClassNotFoundException
- {
- in.defaultReadObject();
- field = StringHelper.intern(field);
- }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java Wed Feb 9 01:03:49 2011
@@ -1,7 +1,5 @@
package org.apache.lucene.index;
-import java.io.Serializable;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,7 +22,7 @@ import java.io.Serializable;
* offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the
* original content).
*/
-public class TermVectorOffsetInfo implements Serializable {
+public class TermVectorOffsetInfo {
/**
* Convenience declaration when creating a {@link org.apache.lucene.index.TermPositionVector} that stores only position information.
*/
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Wed Feb 9 01:03:49 2011
@@ -281,6 +281,7 @@ final class TermVectorsTermsWriterPerFie
int[] lastOffsets; // Last offset we saw
int[] lastPositions; // Last position where this term occurred
+ @Override
ParallelPostingsArray newInstance(int size) {
return new TermVectorsPostingsArray(size);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java Wed Feb 9 01:03:49 2011
@@ -16,6 +16,7 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Wed Feb 9 01:03:49 2011
@@ -109,7 +109,7 @@ public class BlockTermsReader extends Fi
}
}
- private String segment;
+ //private String segment;
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
Comparator<BytesRef> termComp, int termsCacheSize, String codecId)
@@ -119,7 +119,7 @@ public class BlockTermsReader extends Fi
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
this.termComp = termComp;
- this.segment = segment;
+ //this.segment = segment;
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
readBufferSize);
@@ -654,6 +654,7 @@ public class BlockTermsReader extends Fi
return SeekStatus.FOUND;
}
+ @Override
public long ord() {
if (!doOrd) {
throw new UnsupportedOperationException();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Wed Feb 9 01:03:49 2011
@@ -44,7 +44,7 @@ public class FixedGapTermsIndexReader ex
// number of places to multiply out the actual ord, and we
// will overflow int during those multiplies. So to avoid
// having to upgrade each multiple to long in multiple
- // places (error proned), we use long here:
+ // places (error prone), we use long here:
private long totalIndexInterval;
private int indexDivisor;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Wed Feb 9 01:03:49 2011
@@ -540,7 +540,7 @@ public class PreFlexFields extends Field
// We can easily detect S in UTF8: if a byte has
// prefix 11110 (0xf0), then that byte and the
// following 3 bytes encode a single unicode codepoint
- // in S. Similary,we can detect E: if a byte has
+ // in S. Similarly, we can detect E: if a byte has
// prefix 1110111 (0xee), then that byte and the
// following 2 bytes encode a single unicode codepoint
// in E.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java Wed Feb 9 01:03:49 2011
@@ -45,7 +45,7 @@ public final class SegmentTermEnum imple
// whenever you add a new format, make it 1 smaller (negative version logic)!
public static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
- // when removing support for old versions, levae the last supported version here
+ // when removing support for old versions, leave the last supported version here
public static final int FORMAT_MINIMUM = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
private TermBuffer termBuffer = new TermBuffer();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java Wed Feb 9 01:03:49 2011
@@ -58,6 +58,7 @@ extends SegmentTermDocs {
this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time
}
+ @Override
final void seek(TermInfo ti, Term term) throws IOException {
super.seek(ti, term);
if (ti != null)
@@ -69,6 +70,7 @@ extends SegmentTermDocs {
needToLoadPayload = false;
}
+ @Override
public final void close() throws IOException {
super.close();
if (proxStream != null) proxStream.close();
@@ -100,11 +102,13 @@ extends SegmentTermDocs {
return delta;
}
+ @Override
protected final void skippingDoc() throws IOException {
// we remember to skip a document lazily
lazySkipProxCount += freq;
}
+ @Override
public final boolean next() throws IOException {
// we remember to skip the remaining positions of the current
// document lazily
@@ -118,12 +122,8 @@ extends SegmentTermDocs {
return false;
}
- public final int read(final int[] docs, final int[] freqs) {
- throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
- }
-
-
/** Called by super.skipTo(). */
+ @Override
protected void skipProx(long proxPointer, int payloadLength) throws IOException {
// we save the pointer, we might have to skip there lazily
lazySkipPointer = proxPointer;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java Wed Feb 9 01:03:49 2011
@@ -67,15 +67,18 @@ public final class TermInfosReader {
this.term = t;
}
+ @Override
public boolean equals(Object other) {
CloneableTerm t = (CloneableTerm) other;
return this.term.equals(t.term);
}
+ @Override
public int hashCode() {
return term.hashCode();
}
+ @Override
public Object clone() {
return new CloneableTerm(term);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Wed Feb 9 01:03:49 2011
@@ -55,6 +55,7 @@ public abstract class IntIndexInput impl
public abstract void set(Index other);
+ @Override
public abstract Object clone();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Wed Feb 9 01:03:49 2011
@@ -161,6 +161,7 @@ public class SepPostingsReaderImpl exten
return other;
}
+ @Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Wed Feb 9 01:03:49 2011
@@ -130,6 +130,7 @@ class SimpleTextFieldsReader extends Fie
fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
}
+ @Override
public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
//System.out.println("seek to text=" + text.utf8ToString());
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Wed Feb 9 01:03:49 2011
@@ -102,12 +102,14 @@ public class StandardPostingsReader exte
ByteArrayDataInput bytesReader;
byte[] bytes;
+ @Override
public Object clone() {
StandardTermState other = new StandardTermState();
other.copyFrom(this);
return other;
}
+ @Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
StandardTermState other = (StandardTermState) _other;
@@ -121,6 +123,7 @@ public class StandardPostingsReader exte
// (rare!), they will be re-read from disk.
}
+ @Override
public String toString() {
return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset;
}
@@ -367,13 +370,10 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close.
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -520,13 +520,10 @@ public class StandardPostingsReader exte
//System.out.println("StandardR.D&PE advance target=" + target);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -567,6 +564,7 @@ public class StandardPostingsReader exte
return doc;
}
+ @Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
@@ -595,10 +593,12 @@ public class StandardPostingsReader exte
/** Returns the payload at this position, or null if no
* payload was indexed. */
+ @Override
public BytesRef getPayload() throws IOException {
throw new IOException("No payloads exist for this field!");
}
+ @Override
public boolean hasPayload() {
return false;
}
@@ -716,13 +716,11 @@ public class StandardPostingsReader exte
public int advance(int target) throws IOException {
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
- if (limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipInterval) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
@@ -765,6 +763,7 @@ public class StandardPostingsReader exte
return doc;
}
+ @Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
@@ -825,6 +824,7 @@ public class StandardPostingsReader exte
/** Returns the payload at this position, or null if no
* payload was indexed. */
+ @Override
public BytesRef getPayload() throws IOException {
assert lazyProxPointer == -1;
assert posPendingCount < freq;
@@ -842,6 +842,7 @@ public class StandardPostingsReader exte
return payload;
}
+ @Override
public boolean hasPayload() {
return payloadPending && payloadLength > 0;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/Message.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/Message.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/Message.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/Message.java Wed Feb 9 01:03:49 2011
@@ -17,14 +17,13 @@ package org.apache.lucene.messages;
* limitations under the License.
*/
-import java.io.Serializable;
import java.util.Locale;
/**
* Message Interface for a lazy loading.
* For Native Language Support (NLS), system of software internationalization.
*/
-public interface Message extends Serializable {
+public interface Message {
public String getKey();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/MessageImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/MessageImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/MessageImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/messages/MessageImpl.java Wed Feb 9 01:03:49 2011
@@ -25,8 +25,6 @@ import java.util.Locale;
*/
public class MessageImpl implements Message {
- private static final long serialVersionUID = -3077643314630884523L;
-
private String key;
private Object[] arguments = new Object[0];