You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/02/22 02:01:11 UTC
svn commit: r1073192 [8/32] - in /lucene/dev/branches/realtime_search: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/demo/
dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/c...
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java Tue Feb 22 01:00:39 2011
@@ -17,6 +17,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
@@ -24,9 +34,6 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.StringHelper;
-import java.io.IOException;
-import java.util.*;
-
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
@@ -34,7 +41,72 @@ import java.util.*;
* accessing this object.
* @lucene.experimental
*/
-public final class FieldInfos {
+public final class FieldInfos implements Iterable<FieldInfo> {
+ private static final class FieldNumberBiMap {
+ private final Map<Integer,String> numberToName;
+ private final Map<String,Integer> nameToNumber;
+
+ private FieldNumberBiMap() {
+ this.nameToNumber = new HashMap<String, Integer>();
+ this.numberToName = new HashMap<Integer, String>();
+ }
+
+ synchronized int addOrGet(String fieldName, FieldInfoBiMap fieldInfoMap, int preferredFieldNumber) {
+ Integer fieldNumber = nameToNumber.get(fieldName);
+ if (fieldNumber == null) {
+ if (!numberToName.containsKey(preferredFieldNumber)) {
+ // cool - we can use this number globally
+ fieldNumber = preferredFieldNumber;
+ } else {
+ fieldNumber = findNextAvailableFieldNumber(preferredFieldNumber + 1, numberToName.keySet());
+ }
+
+ numberToName.put(fieldNumber, fieldName);
+ nameToNumber.put(fieldName, fieldNumber);
+ }
+
+ return fieldNumber;
+ }
+
+ synchronized void setIfNotSet(int fieldNumber, String fieldName) {
+ if (!numberToName.containsKey(fieldNumber) && !nameToNumber.containsKey(fieldName)) {
+ numberToName.put(fieldNumber, fieldName);
+ nameToNumber.put(fieldName, fieldNumber);
+ }
+ }
+ }
+
+ private static final class FieldInfoBiMap implements Iterable<FieldInfo> {
+ private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
+ private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+ private int nextAvailableNumber = 0;
+
+ public void put(FieldInfo fi) {
+ assert !byNumber.containsKey(fi.number);
+ assert !byName.containsKey(fi.name);
+
+ byNumber.put(fi.number, fi);
+ byName.put(fi.name, fi);
+ }
+
+ public FieldInfo get(String fieldName) {
+ return byName.get(fieldName);
+ }
+
+ public FieldInfo get(int fieldNumber) {
+ return byNumber.get(fieldNumber);
+ }
+
+ public int size() {
+ assert byNumber.size() == byName.size();
+ return byNumber.size();
+ }
+
+ @Override
+ public Iterator<FieldInfo> iterator() {
+ return byNumber.values().iterator();
+ }
+ }
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
@@ -53,11 +125,18 @@ public final class FieldInfos {
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
- private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
- private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+ private final FieldNumberBiMap globalFieldNumbers;
+ private final FieldInfoBiMap localFieldInfos;
+
private int format;
public FieldInfos() {
+ this(new FieldNumberBiMap());
+ }
+
+ private FieldInfos(FieldNumberBiMap globalFieldNumbers) {
+ this.globalFieldNumbers = globalFieldNumbers;
+ this.localFieldInfos = new FieldInfoBiMap();
}
/**
@@ -68,6 +147,7 @@ public final class FieldInfos {
* @throws IOException
*/
public FieldInfos(Directory d, String name) throws IOException {
+ this(new FieldNumberBiMap());
IndexInput input = d.openInput(name);
try {
read(input, name);
@@ -76,17 +156,27 @@ public final class FieldInfos {
}
}
+ private static final int findNextAvailableFieldNumber(int nextPreferredNumber, Set<Integer> unavailableNumbers) {
+ while (unavailableNumbers.contains(nextPreferredNumber)) {
+ nextPreferredNumber++;
+ }
+
+ return nextPreferredNumber;
+ }
+
+ public FieldInfos newFieldInfosWithGlobalFieldNumberMap() {
+ return new FieldInfos(this.globalFieldNumbers);
+ }
+
/**
* Returns a deep clone of this FieldInfos instance.
*/
@Override
synchronized public Object clone() {
- FieldInfos fis = new FieldInfos();
- final int numField = byNumber.size();
- for(int i=0;i<numField;i++) {
- FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
- fis.byNumber.add(fi);
- fis.byName.put(fi.name, fi);
+ FieldInfos fis = new FieldInfos(globalFieldNumbers);
+ for (FieldInfo fi : this) {
+ FieldInfo clone = (FieldInfo) (fi).clone();
+ fis.localFieldInfos.put(clone);
}
return fis;
}
@@ -102,9 +192,7 @@ public final class FieldInfos {
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
- final int numFields = byNumber.size();
- for(int i=0;i<numFields;i++) {
- final FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
}
@@ -215,9 +303,28 @@ public final class FieldInfos {
synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+ return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
+ boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+
FieldInfo fi = fieldInfo(name);
if (fi == null) {
- return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ if (preferredFieldNumber == -1) {
+ preferredFieldNumber = findNextAvailableFieldNumber(localFieldInfos.nextAvailableNumber, localFieldInfos.byNumber.keySet());
+ localFieldInfos.nextAvailableNumber = preferredFieldNumber;
+ }
+
+ // get a global number for this field
+ int fieldNumber = globalFieldNumbers.addOrGet(name, localFieldInfos, preferredFieldNumber);
+ if (localFieldInfos.get(fieldNumber) != null) {
+ // fall back if the global number is already taken
+ fieldNumber = preferredFieldNumber;
+ }
+ return addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
@@ -225,27 +332,27 @@ public final class FieldInfos {
}
synchronized public FieldInfo add(FieldInfo fi) {
- return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ int preferredFieldNumber = fi.number;
+ FieldInfo other = localFieldInfos.get(preferredFieldNumber);
+ if (other == null || !other.name.equals(fi.name)) {
+ preferredFieldNumber = -1;
+ }
+ return addOrUpdateInternal(fi.name, preferredFieldNumber, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
fi.omitTermFreqAndPositions);
}
- synchronized public void update(FieldInfos otherInfos) {
- int numFields = otherInfos.size();
- for (int i = 0; i < numFields; i++) {
- add(otherInfos.fieldInfo(i));
- }
- }
-
- private FieldInfo addInternal(String name, boolean isIndexed,
+ private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
name = StringHelper.intern(name);
- FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+ globalFieldNumbers.setIfNotSet(fieldNumber, name);
+ FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- byNumber.add(fi);
- byName.put(name, fi);
+
+ assert localFieldInfos.get(fi.number) == null;
+ localFieldInfos.put(fi);
return fi;
}
@@ -255,7 +362,7 @@ public final class FieldInfos {
}
public FieldInfo fieldInfo(String fieldName) {
- return byName.get(fieldName);
+ return localFieldInfos.get(fieldName);
}
/**
@@ -277,16 +384,20 @@ public final class FieldInfos {
* doesn't exist.
*/
public FieldInfo fieldInfo(int fieldNumber) {
- return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
+ return (fieldNumber >= 0) ? localFieldInfos.get(fieldNumber) : null;
+ }
+
+ public Iterator<FieldInfo> iterator() {
+ return localFieldInfos.iterator();
}
public int size() {
- return byNumber.size();
+ return localFieldInfos.size();
}
public boolean hasVectors() {
- for (int i = 0; i < size(); i++) {
- if (fieldInfo(i).storeTermVector) {
+ for (FieldInfo fi : this) {
+ if (fi.storeTermVector) {
return true;
}
}
@@ -294,8 +405,8 @@ public final class FieldInfos {
}
public boolean hasNorms() {
- for (int i = 0; i < size(); i++) {
- if (!fieldInfo(i).omitNorms) {
+ for (FieldInfo fi : this) {
+ if (!fi.omitNorms) {
return true;
}
}
@@ -314,8 +425,7 @@ public final class FieldInfos {
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
- for (int i = 0; i < size(); i++) {
- FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@@ -325,7 +435,8 @@ public final class FieldInfos {
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
output.writeString(fi.name);
- output.writeInt(fi.codecId);
+ output.writeInt(fi.number);
+ output.writeInt(fi.getCodecId());
output.writeByte(bits);
}
}
@@ -345,6 +456,7 @@ public final class FieldInfos {
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
// if this is a previous format codec 0 will be preflex!
+ final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
@@ -354,8 +466,8 @@ public final class FieldInfos {
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
- final FieldInfo addInternal = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- addInternal.codecId = codecId;
+ final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ addInternal.setCodecId(codecId);
}
if (input.getFilePointer() != input.length()) {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Tue Feb 22 01:00:39 2011
@@ -44,14 +44,12 @@ final class FieldsWriter {
// If null - we were supplied with streams, if notnull - we manage them ourselves
private Directory directory;
private String segment;
- private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
- FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
+ FieldsWriter(Directory directory, String segment) throws IOException {
this.directory = directory;
this.segment = segment;
- fieldInfos = fn;
boolean success = false;
try {
@@ -69,10 +67,9 @@ final class FieldsWriter {
}
}
- FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
+ FieldsWriter(IndexOutput fdx, IndexOutput fdt) {
directory = null;
segment = null;
- fieldInfos = fn;
fieldsStream = fdt;
indexStream = fdx;
}
@@ -164,7 +161,7 @@ final class FieldsWriter {
assert fieldsStream.getFilePointer() == position;
}
- final void addDocument(Document doc) throws IOException {
+ final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Feb 22 01:00:39 2011
@@ -22,13 +22,14 @@ import org.apache.lucene.document.FieldS
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MapBackedSet;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Comparator;
+import java.util.concurrent.ConcurrentHashMap;
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -286,6 +287,7 @@ public class FilterIndexReader extends I
public FilterIndexReader(IndexReader in) {
super();
this.in = in;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
}
@Override
@@ -391,11 +393,6 @@ public class FilterIndexReader extends I
@Override
protected void doClose() throws IOException {
in.close();
-
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
}
@@ -454,4 +451,16 @@ public class FilterIndexReader extends I
buffer.append(')');
return buffer.toString();
}
-}
\ No newline at end of file
+
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ in.addReaderFinishedListener(listener);
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ in.removeReaderFinishedListener(listener);
+ }
+}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Tue Feb 22 01:00:39 2011
@@ -79,7 +79,7 @@ final class FreqProxTermsWriter extends
// If this field has postings then add them to the
// segment
- fieldWriter.flush(consumer, state);
+ fieldWriter.flush(fieldInfo.name, consumer, state);
TermsHashPerField perField = fieldWriter.termsHashPerField;
assert termsHash == null || termsHash == perField.termsHash;
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Tue Feb 22 01:00:39 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Comparator;
+import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable;
@@ -26,6 +27,7 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
@@ -200,6 +202,7 @@ final class FreqProxTermsWriterPerField
return new FreqProxPostingsArray(size);
}
+ @Override
void copyTo(ParallelPostingsArray toArray, int numToCopy) {
assert toArray instanceof FreqProxPostingsArray;
FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
@@ -225,14 +228,23 @@ final class FreqProxTermsWriterPerField
/* Walk through all unique text tokens (Posting
* instances) found in this field and serialize them
* into a single RAM segment. */
- void flush(FieldsConsumer consumer, final SegmentWriteState state)
+ void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state)
throws CorruptIndexException, IOException {
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
+ final Term protoTerm = new Term(fieldName);
+
final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ final Map<Term,Integer> segDeletes;
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ segDeletes = state.segDeletes.terms;
+ } else {
+ segDeletes = null;
+ }
+
final int[] termIDs = termsHashPerField.sortPostings(termComp);
final int numTerms = termsHashPerField.bytesHash.size();
final BytesRef text = new BytesRef();
@@ -260,6 +272,18 @@ final class FreqProxTermsWriterPerField
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
+ final int delDocLimit;
+ if (segDeletes != null) {
+ final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
+ if (docIDUpto != null) {
+ delDocLimit = docIDUpto;
+ } else {
+ delDocLimit = 0;
+ }
+ } else {
+ delDocLimit = 0;
+ }
+
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
@@ -300,7 +324,28 @@ final class FreqProxTermsWriterPerField
numDocs++;
assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
final int termDocFreq = termFreq;
+
+ // NOTE: we could check here if the docID was
+ // deleted, and skip it. However, this is somewhat
+ // dangerous because it can yield non-deterministic
+ // behavior since we may see the docID before we see
+ // the term that caused it to be deleted. This
+ // would mean some (but not all) of its postings may
+ // make it into the index, which'd alter the docFreq
+ // for those terms. We could fix this by doing two
+ // passes, ie first sweep marks all del docs, and
+ // 2nd sweep does the real flush, but I suspect
+ // that'd add too much time to flush.
postingsConsumer.startDoc(docID, termDocFreq);
+ if (docID < delDocLimit) {
+ // Mark it deleted. TODO: we could also skip
+ // writing its postings; this would be
+ // deterministic (just for this Term's docs).
+ if (state.deletedDocs == null) {
+ state.deletedDocs = new BitVector(state.numDocs);
+ }
+ state.deletedDocs.set(docID);
+ }
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
Added: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1073192&view=auto
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (added)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Tue Feb 22 01:00:39 2011
@@ -0,0 +1,145 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
+
+/** Holds buffered deletes by term or query, once pushed.
+ * Pushed deltes are write-once, so we shift to more
+ * memory efficient data structure to hold them. We don't
+ * hold docIDs because these are applied on flush. */
+
+class FrozenBufferedDeletes {
+
+ /* Rough logic: Term is object w/
+ String field and String text (OBJ_HEADER + 2*POINTER).
+ We don't count Term's field since it's interned.
+ Term's text is String (OBJ_HEADER + 4*INT + POINTER +
+ OBJ_HEADER + string.length*CHAR). */
+ final static int BYTES_PER_DEL_TERM = 3*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 4*RamUsageEstimator.NUM_BYTES_INT;
+
+ /* Query we often undercount (say 24 bytes), plus int. */
+ final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24;
+
+ // Terms, in sorted order:
+ // TODO: would be more RAM efficient to store BytesRef[],
+ // per field:
+ final Term[] terms;
+
+ // Parallel array of deleted query, and the docIDUpto for
+ // each
+ final Query[] queries;
+ final int[] queryLimits;
+ final int bytesUsed;
+ final int numTermDeletes;
+ final long gen;
+
+ public FrozenBufferedDeletes(BufferedDeletes deletes, long gen) {
+ terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
+ queries = new Query[deletes.queries.size()];
+ queryLimits = new int[deletes.queries.size()];
+ int upto = 0;
+ for(Map.Entry<Query,Integer> ent : deletes.queries.entrySet()) {
+ queries[upto] = ent.getKey();
+ queryLimits[upto] = ent.getValue();
+ upto++;
+ }
+ bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY;
+ numTermDeletes = deletes.numTermDeletes.get();
+ this.gen = gen;
+ }
+
+ public Iterable<Term> termsIterable() {
+ return new Iterable<Term>() {
+ // @Override -- not until Java 1.6
+ public Iterator<Term> iterator() {
+ return new Iterator<Term>() {
+ private int upto;
+
+ // @Override -- not until Java 1.6
+ public boolean hasNext() {
+ return upto < terms.length;
+ }
+
+ // @Override -- not until Java 1.6
+ public Term next() {
+ return terms[upto++];
+ }
+
+ // @Override -- not until Java 1.6
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ }
+
+ public Iterable<QueryAndLimit> queriesIterable() {
+ return new Iterable<QueryAndLimit>() {
+ // @Override -- not until Java 1.6
+ public Iterator<QueryAndLimit> iterator() {
+ return new Iterator<QueryAndLimit>() {
+ private int upto;
+
+ // @Override -- not until Java 1.6
+ public boolean hasNext() {
+ return upto < queries.length;
+ }
+
+ // @Override -- not until Java 1.6
+ public QueryAndLimit next() {
+ QueryAndLimit ret = new QueryAndLimit(queries[upto], queryLimits[upto]);
+ upto++;
+ return ret;
+ }
+
+ // @Override -- not until Java 1.6
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ public String toString() {
+ String s = "";
+ if (numTermDeletes != 0) {
+ s += " " + numTermDeletes + " deleted terms (unique count=" + terms.length + ")";
+ }
+ if (queries.length != 0) {
+ s += " " + queries.length + " deleted queries";
+ }
+ if (bytesUsed != 0) {
+ s += " bytesUsed=" + bytesUsed;
+ }
+
+ return s;
+ }
+
+ boolean any() {
+ return terms.length > 0 || queries.length > 0;
+ }
+}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Tue Feb 22 01:00:39 2011
@@ -204,7 +204,7 @@ public final class IndexFileNames {
/**
* Returns true if the given filename ends with the given extension. One
- * should provide a <i>pure</i> extension, withouth '.'.
+ * should provide a <i>pure</i> extension, without '.'.
*/
public static boolean matchesExtension(String filename, String ext) {
// It doesn't make a difference whether we allocate a StringBuilder ourself
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java Tue Feb 22 01:00:39 2011
@@ -21,7 +21,7 @@ import java.io.FileNotFoundException;
/**
* Signals that no index was found in the Directory. Possibly because the
- * directory is empty, however can slso indicate an index corruption.
+ * directory is empty, however can also indicate an index corruption.
*/
public final class IndexNotFoundException extends FileNotFoundException {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java Tue Feb 22 01:00:39 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -82,6 +83,62 @@ import java.util.concurrent.atomic.Atomi
public abstract class IndexReader implements Cloneable,Closeable {
/**
+ * A custom listener that's invoked when the IndexReader
+ * is finished.
+ *
+ * <p>For a SegmentReader, this listener is called only
+ * once all SegmentReaders sharing the same core are
+ * closed. At this point it is safe for apps to evict
+ * this reader from any caches keyed on {@link
+ * #getCoreCacheKey}. This is the same interface that
+ * {@link FieldCache} uses, internally, to evict
+ * entries.</p>
+ *
+ * <p>For other readers, this listener is called when they
+ * are closed.</p>
+ *
+ * @lucene.experimental
+ */
+ public static interface ReaderFinishedListener {
+ public void finished(IndexReader reader);
+ }
+
+ // Impls must set this if they may call add/removeReaderFinishedListener:
+ protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
+
+ /** Expert: adds a {@link ReaderFinishedListener}. The
+ * provided listener is also added to any sub-readers, if
+ * this is a composite reader. Also, any reader reopened
+ * or cloned from this one will also copy the listeners at
+ * the time of reopen.
+ *
+ * @lucene.experimental */
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.add(listener);
+ }
+
+ /** Expert: remove a previously added {@link ReaderFinishedListener}.
+ *
+ * @lucene.experimental */
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.remove(listener);
+ }
+
+ protected void notifyReaderFinishedListeners() {
+ // Defensive (should never be null -- all impls must set
+ // this):
+ if (readerFinishedListeners != null) {
+ for(ReaderFinishedListener listener : readerFinishedListeners) {
+ listener.finished(this);
+ }
+ }
+ }
+
+ protected void readerFinished() {
+ notifyReaderFinishedListeners();
+ }
+
+ /**
* Constants describing field properties, for example used for
* {@link IndexReader#getFieldNames(FieldOption)}.
*/
@@ -195,6 +252,7 @@ public abstract class IndexReader implem
refCount.incrementAndGet();
}
}
+ readerFinished();
}
}
@@ -238,24 +296,26 @@ public abstract class IndexReader implem
/**
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
*
- *
* @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
* @return The new IndexReader
* @throws CorruptIndexException
* @throws IOException if there is a low-level IO error
*
- * @see #reopen(IndexWriter)
+ * @see #reopen(IndexWriter,boolean)
*
* @lucene.experimental
*/
- public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
-
-
-
/** Expert: returns an IndexReader reading the index in the given
* {@link IndexCommit}. You should pass readOnly=true, since it
* gives much better concurrent performance, unless you
@@ -358,7 +418,10 @@ public abstract class IndexReader implem
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
- * to -1 to skip loading the terms index entirely.
+ * to -1 to skip loading the terms index entirely. This is only useful in
+ * advanced situations when you will only .next() through all terms;
+ * attempts to seek will hit an exception.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@@ -557,18 +620,26 @@ public abstract class IndexReader implem
* if you attempt to reopen any of those readers, you'll
* hit an {@link AlreadyClosedException}.</p>
*
- * @lucene.experimental
- *
* @return IndexReader that covers entire index plus all
* changes made so far by this IndexWriter instance
*
+ * @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
+ *
* @throws IOException
+ *
+ * @lucene.experimental
*/
- public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
/**
* Efficiently clones the IndexReader (sharing most
* internal state).
@@ -933,8 +1004,8 @@ public abstract class IndexReader implem
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
- * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
- * int) length normalization}. Thus, to preserve the length normalization
+ * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
+ * length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
* <b>NOTE:</b> If this field does not store norms, then
@@ -1163,7 +1234,16 @@ public abstract class IndexReader implem
return n;
}
- /** Undeletes all documents currently marked as deleted in this index.
+ /** Undeletes all documents currently marked as deleted in
+ * this index.
+ *
+ * <p>NOTE: this method can only recover documents marked
+ * for deletion but not yet removed from the index; when
+ * and how Lucene removes deleted documents is an
+ * implementation detail, subject to change from release
+ * to release. However, you can use {@link
+ * #numDeletedDocs} on the current IndexReader instance to
+ * see how many documents will be un-deleted.
*
* @throws StaleReaderException if the index has changed
* since this reader was opened
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java Tue Feb 22 01:00:39 2011
@@ -31,22 +31,24 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.MapBackedSet;
/**
An <code>IndexWriter</code> creates and maintains an index.
@@ -214,12 +216,12 @@ public class IndexWriter implements Clos
private long lastCommitChangeCount; // last changeCount that was committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
- private HashMap<SegmentInfo,Integer> rollbackSegments;
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
final SegmentInfos segmentInfos; // the segments
+ final FieldInfos fieldInfos;
private DocumentsWriter docWriter;
final IndexFileDeleter deleter;
@@ -250,7 +252,7 @@ public class IndexWriter implements Clos
private final AtomicInteger flushDeletesCount = new AtomicInteger();
final ReaderPool readerPool = new ReaderPool();
- final BufferedDeletes bufferedDeletes;
+ final BufferedDeletesStream bufferedDeletesStream;
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
@@ -273,6 +275,10 @@ public class IndexWriter implements Clos
// for testing
boolean anyNonBulkMerges;
+ IndexReader getReader() throws IOException {
+ return getReader(true);
+ }
+
/**
* Expert: returns a readonly reader, covering all
* committed as well as un-committed changes to the index.
@@ -332,7 +338,7 @@ public class IndexWriter implements Clos
*
* @throws IOException
*/
- IndexReader getReader() throws IOException {
+ IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
final long tStart = System.currentTimeMillis();
@@ -351,8 +357,8 @@ public class IndexWriter implements Clos
// just like we do when loading segments_N
IndexReader r;
synchronized(this) {
- flush(false, true);
- r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+ flush(false, applyAllDeletes);
+ r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
if (infoStream != null) {
message("return reader version=" + r.getVersion() + " reader=" + r);
}
@@ -365,6 +371,13 @@ public class IndexWriter implements Clos
return r;
}
+ // Used for all SegmentReaders we open
+ private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
+
+ Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
+ return readerFinishedListeners;
+ }
+
/** Holds shared SegmentReader instances. IndexWriter uses
* SegmentReaders for 1) applying deletes, 2) doing
* merges, 3) handing out a real-time reader. This pool
@@ -376,8 +389,7 @@ public class IndexWriter implements Clos
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
- /** Forcefully clear changes for the specified segments,
- * and remove from the pool. This is called on successful merge. */
+ /** Forcefully clear changes for the specified segments. This is called on successful merge. */
synchronized void clear(SegmentInfos infos) throws IOException {
if (infos == null) {
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
@@ -385,8 +397,9 @@ public class IndexWriter implements Clos
}
} else {
for (final SegmentInfo info: infos) {
- if (readerMap.containsKey(info)) {
- readerMap.get(info).hasChanges = false;
+ final SegmentReader r = readerMap.get(info);
+ if (r != null) {
+ r.hasChanges = false;
}
}
}
@@ -395,8 +408,8 @@ public class IndexWriter implements Clos
// used only by asserts
public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
- assert idx != -1;
- assert segmentInfos.get(idx) == info;
+ assert idx != -1: "info=" + info + " isn't in pool";
+ assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true;
}
@@ -467,6 +480,21 @@ public class IndexWriter implements Clos
return false;
}
+ public synchronized void drop(SegmentInfos infos) throws IOException {
+ for(SegmentInfo info : infos) {
+ drop(info);
+ }
+ }
+
+ public synchronized void drop(SegmentInfo info) throws IOException {
+ final SegmentReader sr = readerMap.get(info);
+ if (sr != null) {
+ sr.hasChanges = false;
+ readerMap.remove(info);
+ sr.close();
+ }
+ }
+
/** Remove all our references to readers, and commits
* any pending changes. */
synchronized void close() throws IOException {
@@ -504,19 +532,18 @@ public class IndexWriter implements Clos
* Commit all segment reader in the pool.
* @throws IOException
*/
- synchronized void commit() throws IOException {
+ synchronized void commit(SegmentInfos infos) throws IOException {
// We invoke deleter.checkpoint below, so we must be
// sync'd on IW:
assert Thread.holdsLock(IndexWriter.this);
- for (Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
+ for (SegmentInfo info : infos) {
- SegmentReader sr = ent.getValue();
- if (sr.hasChanges) {
- assert infoIsLive(sr.getSegmentInfo());
+ final SegmentReader sr = readerMap.get(info);
+ if (sr != null && sr.hasChanges) {
+ assert infoIsLive(info);
sr.doCommit(null);
-
// Must checkpoint w/ deleter, because this
// segment reader will have created new _X_N.del
// file.
@@ -574,6 +601,7 @@ public class IndexWriter implements Clos
// synchronized
// Returns a ref, which we xfer to readerMap:
sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+ sr.readerFinishedListeners = readerFinishedListeners;
if (info.dir == directory) {
// Only pool if reader is not external
@@ -676,7 +704,7 @@ public class IndexWriter implements Clos
* according <code>conf.getOpenMode()</code>.
* @param conf
* the configuration settings according to which IndexWriter should
- * be initalized.
+ * be initialized.
* @throws CorruptIndexException
* if the index is corrupt
* @throws LockObtainFailedException
@@ -701,8 +729,8 @@ public class IndexWriter implements Clos
mergedSegmentWarmer = conf.getMergedSegmentWarmer();
codecs = conf.getCodecProvider();
- bufferedDeletes = new BufferedDeletes(messageID);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream = new BufferedDeletesStream(messageID);
+ bufferedDeletesStream.setInfoStream(infoStream);
poolReaders = conf.getReaderPooling();
OpenMode mode = conf.getOpenMode();
@@ -767,7 +795,10 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getIndexerThreadPool(), getCurrentFieldInfos(), bufferedDeletes);
+ // start with previous field numbers, but new FieldInfos
+ fieldInfos = getCurrentFieldInfos();
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getIndexerThreadPool(),
+ fieldInfos.newFieldInfosWithGlobalFieldNumberMap(), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -830,23 +861,14 @@ public class IndexWriter implements Clos
private FieldInfos getCurrentFieldInfos() throws IOException {
final FieldInfos fieldInfos;
if (segmentInfos.size() > 0) {
- if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- // Pre-4.0 index. In this case we sweep all
- // segments, merging their FieldInfos:
fieldInfos = new FieldInfos();
for(SegmentInfo info : segmentInfos) {
final FieldInfos segFieldInfos = getFieldInfos(info);
- final int fieldCount = segFieldInfos.size();
- for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
- fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
+ for (FieldInfo fi : segFieldInfos) {
+ fieldInfos.add(fi);
}
}
} else {
- // Already a 4.0 index; just seed the FieldInfos
- // from the last segment
- fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
- }
- } else {
fieldInfos = new FieldInfos();
}
return fieldInfos;
@@ -854,10 +876,6 @@ public class IndexWriter implements Clos
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
- rollbackSegments = new HashMap<SegmentInfo,Integer>();
- final int size = rollbackSegmentInfos.size();
- for(int i=0;i<size;i++)
- rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
}
/**
@@ -919,7 +937,7 @@ public class IndexWriter implements Clos
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
deleter.setInfoStream(infoStream);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream.setInfoStream(infoStream);
if (infoStream != null)
messageState();
}
@@ -1165,7 +1183,7 @@ public class IndexWriter implements Clos
public synchronized boolean hasDeletions() throws IOException {
ensureOpen();
- if (bufferedDeletes.any()) {
+ if (bufferedDeletesStream.any()) {
return true;
}
if (docWriter.anyDeletions()) {
@@ -1504,6 +1522,11 @@ public class IndexWriter implements Clos
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @see MergePolicy#findMergesForOptimize
@@ -1653,6 +1676,11 @@ public class IndexWriter implements Clos
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
+ *
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
*/
public void expungeDeletes(boolean doWait)
throws CorruptIndexException, IOException {
@@ -1803,6 +1831,18 @@ public class IndexWriter implements Clos
}
}
+ /** Expert: to be used by a {@link MergePolicy} to avoid
+ * selecting merges for segments already being merged.
+ * The returned collection is not cloned, and thus is
+ * only safe to access if you hold IndexWriter's lock
+ * (which you do when IndexWriter invokes the
+ * MergePolicy).
+ *
+ * <p>Do not alter the returned collection! */
+ public synchronized Collection<SegmentInfo> getMergingSegments() {
+ return mergingSegments;
+ }
+
/** Expert: the {@link MergeScheduler} calls this method
* to retrieve the next merge requested by the
* MergePolicy */
@@ -1860,9 +1900,9 @@ public class IndexWriter implements Clos
mergePolicy.close();
mergeScheduler.close();
- synchronized(this) {
+ bufferedDeletesStream.clear();
- bufferedDeletes.clear();
+ synchronized(this) {
if (pendingCommit != null) {
pendingCommit.rollbackCommit(directory);
@@ -1923,8 +1963,9 @@ public class IndexWriter implements Clos
*
* <p>NOTE: this method will forcefully abort all merges
* in progress. If other threads are running {@link
- * #optimize()} or any of the addIndexes methods, they
- * will receive {@link MergePolicy.MergeAbortedException}s.
+ * #optimize()}, {@link #addIndexes(IndexReader[])} or
+ * {@link #expungeDeletes} methods, they may receive
+ * {@link MergePolicy.MergeAbortedException}s.
*/
public synchronized void deleteAll() throws IOException {
try {
@@ -2039,7 +2080,7 @@ public class IndexWriter implements Clos
deleter.checkpoint(segmentInfos, false);
}
- void addFlushedSegment(SegmentInfo newSegment) throws IOException {
+ void addFlushedSegment(SegmentInfo newSegment, BitVector deletedDocs) throws IOException {
assert newSegment != null;
setDiagnostics(newSegment, "flush");
@@ -2063,6 +2104,38 @@ public class IndexWriter implements Clos
newSegment.setUseCompoundFile(true);
+ // Must write deleted docs after the CFS so we don't
+ // slurp the del file into CFS:
+ if (deletedDocs != null) {
+ final int delCount = deletedDocs.count();
+ assert delCount > 0;
+ newSegment.setDelCount(delCount);
+ newSegment.advanceDelGen();
+ final String delFileName = newSegment.getDelFileName();
+ if (infoStream != null) {
+ message("flush: write " + delCount + " deletes to " + delFileName);
+ }
+ boolean success2 = false;
+ try {
+ // TODO: in the NRT case it'd be better to hand
+ // this del vector over to the
+ // shortly-to-be-opened SegmentReader and let it
+ // carry the changes; there's no reason to use
+ // filesystem as intermediary here.
+ deletedDocs.write(directory, delFileName);
+ success2 = true;
+ } finally {
+ if (!success2) {
+ try {
+ directory.deleteFile(delFileName);
+ } catch (Throwable t) {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+ }
+
success = true;
} finally {
if (!success) {
@@ -2264,10 +2337,13 @@ public class IndexWriter implements Clos
* close the writer. See <a href="#OOME">above</a> for details.
* </p>
*
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
ensureOpen();
@@ -2276,7 +2352,7 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
mergedName, null, codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ fieldInfos.newFieldInfosWithGlobalFieldNumberMap());
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
@@ -2284,8 +2360,8 @@ public class IndexWriter implements Clos
int docCount = merger.merge(); // merge 'em
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
- false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
- merger.fieldInfos().hasVectors());
+ false, merger.getSegmentCodecs(),
+ merger.fieldInfos());
setDiagnostics(info, "addIndexes(IndexReader...)");
boolean useCompoundFile;
@@ -2493,13 +2569,13 @@ public class IndexWriter implements Clos
}
/**
- * Flush all in-memory buffered udpates (adds and deletes)
+ * Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
* @param triggerMerge if true, we may merge segments (if
* deletes or docs were flushed) if necessary
- * @param flushDeletes whether pending deletes should also
+ * @param applyAllDeletes whether pending deletes should also
*/
- protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+ protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
@@ -2510,7 +2586,7 @@ public class IndexWriter implements Clos
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
- if (doFlush(flushDeletes) && triggerMerge) {
+ if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}
@@ -2554,10 +2630,10 @@ public class IndexWriter implements Clos
// tiny segments:
if (flushControl.getFlushDeletes() ||
(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+ bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
applyAllDeletes = true;
if (infoStream != null) {
- message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+ message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
}
}
}
@@ -2567,12 +2643,34 @@ public class IndexWriter implements Clos
message("apply all deletes during flush");
}
flushDeletesCount.incrementAndGet();
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+ if (result.anyDeletes) {
checkpoint();
}
+ if (!keepFullyDeletedSegments && result.allDeleted != null) {
+ if (infoStream != null) {
+ message("drop 100% deleted segments: " + result.allDeleted);
+ }
+ for(SegmentInfo info : result.allDeleted) {
+ // If a merge has already registered for this
+ // segment, we leave it in the readerPool; the
+ // merge will skip merging it and will then drop
+ // it once it's done:
+ if (!mergingSegments.contains(info)) {
+ segmentInfos.remove(info);
+ if (readerPool != null) {
+ readerPool.drop(info);
+ }
+ }
+ }
+ checkpoint();
+ }
+ bufferedDeletesStream.prune(segmentInfos);
+ assert !bufferedDeletesStream.any();
+
flushControl.clearDeletes();
} else if (infoStream != null) {
- message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+ message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
doAfterFlush();
@@ -2600,7 +2698,7 @@ public class IndexWriter implements Clos
public final long ramSizeInBytes() {
ensureOpen();
// nocommit
- //return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+ //return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
return 0;
}
@@ -2611,28 +2709,12 @@ public class IndexWriter implements Clos
return docWriter.getNumDocs();
}
- private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
- int first = segmentInfos.indexOf(merge.segments.info(0));
- if (first == -1)
- throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
- final int numSegments = segmentInfos.size();
-
- final int numSegmentsToMerge = merge.segments.size();
- for(int i=0;i<numSegmentsToMerge;i++) {
- final SegmentInfo info = merge.segments.info(i);
-
- if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
- if (segmentInfos.indexOf(info) == -1)
- throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
- else
- throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
- directory);
+ private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ for(SegmentInfo info : merge.segments) {
+ if (segmentInfos.indexOf(info) == -1) {
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
-
- return first;
}
/** Carefully merges deletes for the segments we just
@@ -2657,13 +2739,19 @@ public class IndexWriter implements Clos
// started merging:
int docUpto = 0;
int delCount = 0;
+ long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
SegmentInfo info = sourceSegments.info(i);
+ minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
- SegmentReader previousReader = merge.readersClone[i];
+ final SegmentReader previousReader = merge.readerClones.get(i);
+ if (previousReader == null) {
+ // Reader was skipped because it was 100% deletions
+ continue;
+ }
final Bits prevDelDocs = previousReader.getDeletedDocs();
- SegmentReader currentReader = merge.readers[i];
+ final SegmentReader currentReader = merge.readers.get(i);
final Bits currentDelDocs = currentReader.getDeletedDocs();
if (previousReader.hasDeletions()) {
@@ -2710,9 +2798,17 @@ public class IndexWriter implements Clos
assert mergedReader.numDeletedDocs() == delCount;
mergedReader.hasChanges = delCount > 0;
+
+ // If new deletes were applied while we were merging
+ // (which happens if eg commit() or getReader() is
+ // called during our merge), then it better be the case
+ // that the delGen has increased for all our merged
+ // segments:
+ assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+ mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
}
- /* FIXME if we want to support non-contiguous segment merges */
synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
assert testPoint("startCommitMerge");
@@ -2738,19 +2834,57 @@ public class IndexWriter implements Clos
return false;
}
- final int start = ensureContiguousMerge(merge);
-
commitMergedDeletes(merge, mergedReader);
// If the doc store we are using has been closed and
// is in now compound format (but wasn't when we
// started), then we will switch to the compound
// format as well:
- setMergeDocStoreIsCompoundFile(merge);
- segmentInfos.subList(start, start + merge.segments.size()).clear();
assert !segmentInfos.contains(merge.info);
- segmentInfos.add(start, merge.info);
+
+ final boolean allDeleted = mergedReader.numDocs() == 0;
+
+ if (infoStream != null && allDeleted) {
+ message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
+ }
+
+ final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ int segIdx = 0;
+ int newSegIdx = 0;
+ boolean inserted = false;
+ final int curSegCount = segmentInfos.size();
+ while(segIdx < curSegCount) {
+ final SegmentInfo info = segmentInfos.info(segIdx++);
+ if (mergedAway.contains(info)) {
+ if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
+ segmentInfos.set(segIdx-1, merge.info);
+ inserted = true;
+ newSegIdx++;
+ }
+ } else {
+ segmentInfos.set(newSegIdx++, info);
+ }
+ }
+
+ // Either we found place to insert segment, or, we did
+ // not, but only because all segments we merged became
+ // deleted while we are merging, in which case it should
+ // be the case that the new segment is also all deleted:
+ if (!inserted) {
+ assert allDeleted;
+ if (keepFullyDeletedSegments) {
+ segmentInfos.add(0, merge.info);
+ } else {
+ readerPool.drop(merge.info);
+ }
+ }
+
+ segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+ if (infoStream != null) {
+ message("after commit: " + segString());
+ }
closeMergeReaders(merge, false);
@@ -2763,16 +2897,11 @@ public class IndexWriter implements Clos
// disk, updating SegmentInfo, etc.:
readerPool.clear(merge.segments);
- // remove pending deletes of the segments
- // that were merged, moving them onto the segment just
- // before the merged segment
- // Lock order: IW -> BD
- bufferedDeletes.commitMerge(merge);
-
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
+
return true;
}
@@ -2900,7 +3029,7 @@ public class IndexWriter implements Clos
}
}
- ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
pendingMerges.add(merge);
@@ -2914,8 +3043,9 @@ public class IndexWriter implements Clos
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
- for(int i=0;i<count;i++)
+ for(int i=0;i<count;i++) {
mergingSegments.add(merge.segments.info(i));
+ }
// Merge is now registered
merge.registerDone = true;
@@ -2927,10 +3057,6 @@ public class IndexWriter implements Clos
final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
boolean success = false;
try {
- // Lock order: IW -> BD
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
- checkpoint();
- }
_mergeInit(merge);
success = true;
} finally {
@@ -2954,6 +3080,9 @@ public class IndexWriter implements Clos
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
+ // TODO: is there any perf benefit to sorting
+ // merged segments? eg biggest to smallest?
+
if (merge.info != null)
// mergeInit already done
return;
@@ -2964,7 +3093,36 @@ public class IndexWriter implements Clos
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, fieldInfos.newFieldInfosWithGlobalFieldNumberMap());
+
+ // Lock order: IW -> BD
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+
+ if (result.anyDeletes) {
+ checkpoint();
+ }
+
+ if (!keepFullyDeletedSegments && result.allDeleted != null) {
+ if (infoStream != null) {
+ message("drop 100% deleted segments: " + result.allDeleted);
+ }
+ for(SegmentInfo info : result.allDeleted) {
+ segmentInfos.remove(info);
+ if (merge.segments.contains(info)) {
+ mergingSegments.remove(info);
+ merge.segments.remove(info);
+ }
+ }
+ if (readerPool != null) {
+ readerPool.drop(result.allDeleted);
+ }
+ checkpoint();
+ }
+
+ merge.info.setBufferedDeletesGen(result.gen);
+
+ // Lock order: IW -> BD
+ bufferedDeletesStream.prune(segmentInfos);
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
@@ -3014,8 +3172,9 @@ public class IndexWriter implements Clos
if (merge.registerDone) {
final SegmentInfos sourceSegments = merge.segments;
final int end = sourceSegments.size();
- for(int i=0;i<end;i++)
+ for(int i=0;i<end;i++) {
mergingSegments.remove(sourceSegments.info(i));
+ }
mergingSegments.remove(merge.info);
merge.registerDone = false;
}
@@ -3023,47 +3182,30 @@ public class IndexWriter implements Clos
runningMerges.remove(merge);
}
- private synchronized void setMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge) {
- final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
- if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
- final int size = segmentInfos.size();
- for(int i=0;i<size;i++) {
- final SegmentInfo info = segmentInfos.info(i);
- final String docStoreSegment = info.getDocStoreSegment();
- if (docStoreSegment != null &&
- docStoreSegment.equals(mergeDocStoreSegment) &&
- info.getDocStoreIsCompoundFile()) {
- merge.info.setDocStoreIsCompoundFile(true);
- break;
- }
- }
- }
- }
-
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
- final int numSegments = merge.segments.size();
+ final int numSegments = merge.readers.size();
if (suppressExceptions) {
// Suppress any new exceptions so we throw the
// original cause
boolean anyChanges = false;
for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
+ if (merge.readers.get(i) != null) {
try {
- anyChanges |= readerPool.release(merge.readers[i], false);
+ anyChanges |= readerPool.release(merge.readers.get(i), false);
} catch (Throwable t) {
}
- merge.readers[i] = null;
+ merge.readers.set(i, null);
}
- if (merge.readersClone[i] != null) {
+ if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
try {
- merge.readersClone[i].close();
+ merge.readerClones.get(i).close();
} catch (Throwable t) {
}
// This was a private clone and we had the
// only reference
- assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
- merge.readersClone[i] = null;
+ assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
+ merge.readerClones.set(i, null);
}
}
if (anyChanges) {
@@ -3071,16 +3213,16 @@ public class IndexWriter implements Clos
}
} else {
for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
- readerPool.release(merge.readers[i], true);
- merge.readers[i] = null;
+ if (merge.readers.get(i) != null) {
+ readerPool.release(merge.readers.get(i), true);
+ merge.readers.set(i, null);
}
- if (merge.readersClone[i] != null) {
- merge.readersClone[i].close();
+ if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
+ merge.readerClones.get(i).close();
// This was a private clone and we had the only reference
- assert merge.readersClone[i].getRefCount() == 0;
- merge.readersClone[i] = null;
+ assert merge.readerClones.get(i).getRefCount() == 0;
+ merge.readerClones.set(i, null);
}
}
}
@@ -3099,46 +3241,53 @@ public class IndexWriter implements Clos
int mergedDocCount = 0;
SegmentInfos sourceSegments = merge.segments;
- final int numSegments = sourceSegments.size();
SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ merge.info.getFieldInfos());
if (infoStream != null) {
message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
}
- merge.info.setHasVectors(merger.fieldInfos().hasVectors());
- merge.readers = new SegmentReader[numSegments];
- merge.readersClone = new SegmentReader[numSegments];
+ merge.readers = new ArrayList<SegmentReader>();
+ merge.readerClones = new ArrayList<SegmentReader>();
+
+ merge.info.clearFilesCache();
+
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
try {
int totDocCount = 0;
+ int segUpto = 0;
+ while(segUpto < sourceSegments.size()) {
- for (int i = 0; i < numSegments; i++) {
- final SegmentInfo info = sourceSegments.info(i);
+ final SegmentInfo info = sourceSegments.info(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
- SegmentReader reader = merge.readers[i] = readerPool.get(info, true,
- MERGE_READ_BUFFER_SIZE,
- -config.getReaderTermsIndexDivisor());
+ final SegmentReader reader = readerPool.get(info, true,
+ MERGE_READ_BUFFER_SIZE,
+ -config.getReaderTermsIndexDivisor());
+ merge.readers.add(reader);
// We clone the segment readers because other
// deletes may come in while we're merging so we
// need readers that will not change
- SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.clone(true);
- merger.add(clone);
+ final SegmentReader clone = (SegmentReader) reader.clone(true);
+ merge.readerClones.add(clone);
+ if (reader.numDocs() > 0) {
+ merger.add(clone);
+ }
totDocCount += clone.numDocs();
+ segUpto++;
}
if (infoStream != null) {
- message("merge: total "+totDocCount+" docs");
+ message("merge: total " + totDocCount + " docs");
}
merge.checkAborted(directory);
@@ -3151,17 +3300,17 @@ public class IndexWriter implements Clos
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
- message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
+ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
}
- anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments;
+ anyNonBulkMerges |= merger.getMatchedSubReaderCount() != merge.readers.size();
- assert mergedDocCount == totDocCount;
+ assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
// Very important to do this before opening the reader
// because codec must know if prox was written for
// this segment:
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
- merge.info.setHasProx(merger.fieldInfos().hasProx());
+ merge.info.clearFilesCache();
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
@@ -3329,6 +3478,19 @@ public class IndexWriter implements Clos
}
}
+ private boolean keepFullyDeletedSegments;
+
+ /** Only for testing.
+ *
+ * @lucene.internal */
+ void keepFullyDeletedSegments() {
+ keepFullyDeletedSegments = true;
+ }
+
+ boolean getKeepFullyDeletedSegments() {
+ return keepFullyDeletedSegments;
+ }
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection<String> files = toSync.files(directory, false);
@@ -3384,9 +3546,9 @@ public class IndexWriter implements Clos
if (infoStream != null)
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
- readerPool.commit();
-
+ readerPool.commit(segmentInfos);
toSync = (SegmentInfos) segmentInfos.clone();
+
assert filesExist(toSync);
if (commitUserData != null)
@@ -3516,7 +3678,7 @@ public class IndexWriter implements Clos
}
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
}
synchronized boolean isClosed() {
@@ -3680,11 +3842,10 @@ public class IndexWriter implements Clos
}
public synchronized boolean flushByRAMUsage(String reason) {
- // nocommit
// final double ramBufferSizeMB = config.getRAMBufferSizeMB();
// if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
// final long limit = (long) (ramBufferSizeMB*1024*1024);
-// long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+// long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
// if (used >= limit) {
//
// // DocumentsWriter may be able to free up some
@@ -3692,7 +3853,7 @@ public class IndexWriter implements Clos
// // Lock order: FC -> DW
// docWriter.balanceRAM();
//
-// used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+// used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
// if (used >= limit) {
// return setFlushPending("ram full: " + reason, false);
// }
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Tue Feb 22 01:00:39 2011
@@ -556,10 +556,13 @@ public final class IndexWriterConfig imp
/** Sets the termsIndexDivisor passed to any readers that
* IndexWriter opens, for example when applying deletes
* or creating a near-real-time reader in {@link
- * IndexWriter#getReader}. */
+ * IndexWriter#getReader}. If you pass -1, the terms index
+ * won't be loaded by the readers. This is only useful in
+ * advanced situations when you will only .next() through
+ * all terms; attempts to seek will hit an exception. */
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
- if (divisor <= 0) {
- throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+ if (divisor <= 0 && divisor != -1) {
+ throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
}
readerTermsIndexDivisor = divisor;
return this;
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Tue Feb 22 01:00:39 2011
@@ -37,10 +37,4 @@ abstract class InvertedDocConsumer {
/** Attempt to free RAM, returning true if any RAM was
* freed */
abstract boolean freeRAM();
-
- FieldInfos fieldInfos;
-
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
}
-}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Tue Feb 22 01:00:39 2011
@@ -23,7 +23,6 @@ import java.util.Map;
abstract class InvertedDocEndConsumer {
abstract void flush(Map<FieldInfo, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
abstract void abort();
- abstract void setFieldInfos(FieldInfos fieldInfos);
abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
abstract void startDocument() throws IOException;
abstract void finishDocument() throws IOException;