You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/08/30 17:06:49 UTC
svn commit: r1518989 [3/10] - in /lucene/dev/branches/lucene3069: ./
dev-tools/ dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/solr/core/src/java/ lucene/ lucene/analysis/
lucene/analysis/common/ lucene/analysis/common/src/java/...
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Fri Aug 30 15:06:42 2013
@@ -17,11 +17,6 @@ package org.apache.lucene.codecs.lucene4
* limitations under the License.
*/
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.TABLE_COMPRESSED;
-import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.UNCOMPRESSED;
-
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
@@ -78,6 +73,22 @@ class Lucene42DocValuesProducer extends
new HashMap<Integer,FST<Long>>();
private final int maxDoc;
+
+
+ static final byte NUMBER = 0;
+ static final byte BYTES = 1;
+ static final byte FST = 2;
+
+ static final int BLOCK_SIZE = 4096;
+
+ static final byte DELTA_COMPRESSED = 0;
+ static final byte TABLE_COMPRESSED = 1;
+ static final byte UNCOMPRESSED = 2;
+ static final byte GCD_COMPRESSED = 3;
+
+ static final int VERSION_START = 0;
+ static final int VERSION_GCD_COMPRESSION = 1;
+ static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
@@ -88,8 +99,8 @@ class Lucene42DocValuesProducer extends
final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
- Lucene42DocValuesConsumer.VERSION_START,
- Lucene42DocValuesConsumer.VERSION_CURRENT);
+ VERSION_START,
+ VERSION_CURRENT);
numerics = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
fsts = new HashMap<Integer,FSTEntry>();
@@ -109,8 +120,8 @@ class Lucene42DocValuesProducer extends
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.openInput(dataName, state.context);
final int version2 = CodecUtil.checkHeader(data, dataCodec,
- Lucene42DocValuesConsumer.VERSION_START,
- Lucene42DocValuesConsumer.VERSION_CURRENT);
+ VERSION_START,
+ VERSION_CURRENT);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
@@ -127,7 +138,7 @@ class Lucene42DocValuesProducer extends
int fieldNumber = meta.readVInt();
while (fieldNumber != -1) {
int fieldType = meta.readByte();
- if (fieldType == Lucene42DocValuesConsumer.NUMBER) {
+ if (fieldType == NUMBER) {
NumericEntry entry = new NumericEntry();
entry.offset = meta.readLong();
entry.format = meta.readByte();
@@ -140,11 +151,11 @@ class Lucene42DocValuesProducer extends
default:
throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
}
- if (entry.format != Lucene42DocValuesConsumer.UNCOMPRESSED) {
+ if (entry.format != UNCOMPRESSED) {
entry.packedIntsVersion = meta.readVInt();
}
numerics.put(fieldNumber, entry);
- } else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
+ } else if (fieldType == BYTES) {
BinaryEntry entry = new BinaryEntry();
entry.offset = meta.readLong();
entry.numBytes = meta.readLong();
@@ -155,7 +166,7 @@ class Lucene42DocValuesProducer extends
entry.blockSize = meta.readVInt();
}
binaries.put(fieldNumber, entry);
- } else if (fieldType == Lucene42DocValuesConsumer.FST) {
+ } else if (fieldType == FST) {
FSTEntry entry = new FSTEntry();
entry.offset = meta.readLong();
entry.numOrds = meta.readVLong();
@@ -429,6 +440,15 @@ class Lucene42DocValuesProducer extends
}
};
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ if (field.getDocValuesType() == FieldInfo.DocValuesType.SORTED_SET) {
+ return new SortedSetDocsWithField(getSortedSet(field), maxDoc);
+ } else {
+ return new Bits.MatchAllBits(maxDoc);
+ }
+ }
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java Fri Aug 30 15:06:42 2013
@@ -41,7 +41,7 @@ import org.apache.lucene.util.packed.Pac
* </ul>
* @see Lucene42DocValuesFormat
*/
-public final class Lucene42NormsFormat extends NormsFormat {
+public class Lucene42NormsFormat extends NormsFormat {
final float acceptableOverheadRatio;
/**
@@ -67,7 +67,7 @@ public final class Lucene42NormsFormat e
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
- return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
+ return new Lucene42NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html Fri Aug 30 15:06:42 2013
@@ -178,7 +178,7 @@ For each field in each document, a value
that is multiplied into the score for hits on that field.
</li>
<li>
-{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}.
+{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}.
For each field in each document, the term vector (sometimes
called document vector) may be stored. A term vector consists of term text and
term frequency. To add Term Vectors to your index see the
@@ -299,17 +299,17 @@ systems that frequently run out of file
<td>Encodes additional scoring factors or other per-document information.</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}</td>
<td>.tvx</td>
<td>Stores offset into the document data file</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}</td>
<td>.tvd</td>
<td>Contains information about each document that has term vectors</td>
</tr>
<tr>
-<td>{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}</td>
+<td>{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}</td>
<td>.tvf</td>
<td>The field level info about term vectors</td>
</tr>
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/package.html?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/package.html (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/package.html Fri Aug 30 15:06:42 2013
@@ -61,9 +61,13 @@ name of your codec.
If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
formats for different fields, then you can register your custom postings format in the same way (in
META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
- {@link org.apache.lucene.codecs.lucene42.Lucene42Codec} and override
- {@link org.apache.lucene.codecs.lucene42.Lucene42Codec#getPostingsFormatForField(String)} to return your custom
+ {@link org.apache.lucene.codecs.lucene45.Lucene45Codec} and override
+ {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getPostingsFormatForField(String)} to return your custom
postings format.
</p>
+<p>
+ Similarly, if you just want to customise the {@link org.apache.lucene.codecs.DocValuesFormat} per-field, have
+ a look at {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getDocValuesFormatForField(String)}.
+</p>
</body>
</html>
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java Fri Aug 30 15:06:42 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -265,6 +266,12 @@ public abstract class PerFieldDocValuesF
DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSortedSet(field);
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ DocValuesProducer producer = fields.get(field.name);
+ return producer == null ? null : producer.getDocsWithField(field);
+ }
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java Fri Aug 30 15:06:42 2013
@@ -207,6 +207,12 @@ public abstract class AtomicReader exten
* this field. The returned instance should only be
* used by a single thread. */
public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;
+
+ /** Returns a {@link Bits} at the size of <code>reader.maxDoc()</code>,
+ * with turned on bits for each docid that does have a value for this field,
+ * or null if no DocValues were indexed for this field. The
+ * returned instance should only be used by a single thread */
+ public abstract Bits getDocsWithField(String field) throws IOException;
/** Returns {@link NumericDocValues} representing norms
* for this field, or null if no {@link NumericDocValues}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java Fri Aug 30 15:06:42 2013
@@ -22,29 +22,49 @@ import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
-import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
-
-
/** Buffers up pending byte[] per doc, then flushes when
* segment flushes. */
class BinaryDocValuesWriter extends DocValuesWriter {
- private final ByteBlockPool pool;
+ /** Maximum length for a binary field; we set this to "a
+ * bit" below Integer.MAX_VALUE because the exact max
+ * allowed byte[] is JVM dependent, so we want to avoid
+ * a case where a large value worked in one JVM but
+ * failed later at search time with a different JVM. */
+ private static final int MAX_LENGTH = Integer.MAX_VALUE-256;
+
+ // 32 KB block sizes for PagedBytes storage:
+ private final static int BLOCK_BITS = 15;
+
+ private final PagedBytes bytes;
+ private final DataOutput bytesOut;
+
+ private final Counter iwBytesUsed;
private final AppendingDeltaPackedLongBuffer lengths;
+ private final OpenBitSet docsWithField;
private final FieldInfo fieldInfo;
- private int addedValues = 0;
+ private int addedValues;
+ private long bytesUsed;
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
- this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
+ this.bytes = new PagedBytes(BLOCK_BITS);
+ this.bytesOut = bytes.getDataOutput();
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
+ this.iwBytesUsed = iwBytesUsed;
+ this.docsWithField = new OpenBitSet();
+ this.bytesUsed = docsWithFieldBytesUsed();
+ iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
@@ -54,10 +74,10 @@ class BinaryDocValuesWriter extends DocV
if (value == null) {
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed");
}
- if (value.length > (BYTE_BLOCK_SIZE - 2)) {
- throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2));
+ if (value.length > MAX_LENGTH) {
+ throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH);
}
-
+
// Fill in any holes:
while(addedValues < docID) {
addedValues++;
@@ -65,7 +85,25 @@ class BinaryDocValuesWriter extends DocV
}
addedValues++;
lengths.add(value.length);
- pool.append(value);
+ try {
+ bytesOut.writeBytes(value.bytes, value.offset, value.length);
+ } catch (IOException ioe) {
+ // Should never happen!
+ throw new RuntimeException(ioe);
+ }
+ docsWithField.set(docID);
+ updateBytesUsed();
+ }
+
+ private long docsWithFieldBytesUsed() {
+ // size of the long[] + some overhead
+ return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
+ }
+
+ private void updateBytesUsed() {
+ final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
+ iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
+ bytesUsed = newBytesUsed;
}
@Override
@@ -75,6 +113,7 @@ class BinaryDocValuesWriter extends DocV
@Override
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
final int maxDoc = state.segmentInfo.getDocCount();
+ bytes.freeze(false);
dvConsumer.addBinaryField(fieldInfo,
new Iterable<BytesRef>() {
@Override
@@ -92,10 +131,10 @@ class BinaryDocValuesWriter extends DocV
private class BytesIterator implements Iterator<BytesRef> {
final BytesRef value = new BytesRef();
final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
+ final DataInput bytesIterator = bytes.getDataInput();
final int size = (int) lengths.size();
final int maxDoc;
int upto;
- long byteOffset;
BytesIterator(int maxDoc) {
this.maxDoc = maxDoc;
@@ -111,19 +150,27 @@ class BinaryDocValuesWriter extends DocV
if (!hasNext()) {
throw new NoSuchElementException();
}
+ final BytesRef v;
if (upto < size) {
int length = (int) lengthsIterator.next();
value.grow(length);
value.length = length;
- pool.readBytes(byteOffset, value.bytes, value.offset, value.length);
- byteOffset += length;
+ try {
+ bytesIterator.readBytes(value.bytes, value.offset, value.length);
+ } catch (IOException ioe) {
+ // Should never happen!
+ throw new RuntimeException(ioe);
+ }
+ if (docsWithField.get(upto)) {
+ v = value;
+ } else {
+ v = null;
+ }
} else {
- // This is to handle last N documents not having
- // this DV field in the end of the segment:
- value.length = 0;
+ v = null;
}
upto++;
- return value;
+ return v;
}
@Override
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Fri Aug 30 15:06:42 2013
@@ -496,6 +496,11 @@ public class CheckIndex {
msg(infoStream, " " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount());
segInfoStat.name = info.info.name;
segInfoStat.docCount = info.info.getDocCount();
+
+ final String version = info.info.getVersion();
+ if (info.info.getDocCount() <= 0 && version != null && versionComparator.compare(version, "4.5") >= 0) {
+ throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.getDocCount());
+ }
int toLoseDocCount = info.info.getDocCount();
@@ -1275,7 +1280,8 @@ public class CheckIndex {
if (reader.getBinaryDocValues(fieldInfo.name) != null ||
reader.getNumericDocValues(fieldInfo.name) != null ||
reader.getSortedDocValues(fieldInfo.name) != null ||
- reader.getSortedSetDocValues(fieldInfo.name) != null) {
+ reader.getSortedSetDocValues(fieldInfo.name) != null ||
+ reader.getDocsWithField(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
}
}
@@ -1296,26 +1302,37 @@ public class CheckIndex {
return status;
}
- private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv) {
+ private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv, Bits docsWithField) {
BytesRef scratch = new BytesRef();
for (int i = 0; i < reader.maxDoc(); i++) {
dv.get(i, scratch);
assert scratch.isValid();
+ if (docsWithField.get(i) == false && scratch.length > 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " is missing but has value=" + scratch + " for doc: " + i);
+ }
}
}
- private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv) {
- checkBinaryDocValues(fieldName, reader, dv);
+ private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) {
+ checkBinaryDocValues(fieldName, reader, dv, docsWithField);
final int maxOrd = dv.getValueCount()-1;
FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
int maxOrd2 = -1;
for (int i = 0; i < reader.maxDoc(); i++) {
int ord = dv.getOrd(i);
- if (ord < 0 || ord > maxOrd) {
+ if (ord == -1) {
+ if (docsWithField.get(i)) {
+ throw new RuntimeException("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i);
+ }
+ } else if (ord < -1 || ord > maxOrd) {
throw new RuntimeException("ord out of bounds: " + ord);
+ } else {
+ if (!docsWithField.get(i)) {
+ throw new RuntimeException("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i);
+ }
+ maxOrd2 = Math.max(maxOrd2, ord);
+ seenOrds.set(ord);
}
- maxOrd2 = Math.max(maxOrd2, ord);
- seenOrds.set(ord);
}
if (maxOrd != maxOrd2) {
throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
@@ -1337,7 +1354,7 @@ public class CheckIndex {
}
}
- private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv) {
+ private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) {
final long maxOrd = dv.getValueCount()-1;
OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
long maxOrd2 = -1;
@@ -1345,16 +1362,28 @@ public class CheckIndex {
dv.setDocument(i);
long lastOrd = -1;
long ord;
- while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- if (ord <= lastOrd) {
- throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+ if (docsWithField.get(i)) {
+ int ordCount = 0;
+ while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ ordCount++;
+ if (ord <= lastOrd) {
+ throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+ }
+ if (ord < 0 || ord > maxOrd) {
+ throw new RuntimeException("ord out of bounds: " + ord);
+ }
+ lastOrd = ord;
+ maxOrd2 = Math.max(maxOrd2, ord);
+ seenOrds.set(ord);
}
- if (ord < 0 || ord > maxOrd) {
- throw new RuntimeException("ord out of bounds: " + ord);
+ if (ordCount == 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
+ }
+ } else {
+ long o = dv.nextOrd();
+ if (o != SortedSetDocValues.NO_MORE_ORDS) {
+ throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
}
- lastOrd = ord;
- maxOrd2 = Math.max(maxOrd2, ord);
- seenOrds.set(ord);
}
}
if (maxOrd != maxOrd2) {
@@ -1378,17 +1407,26 @@ public class CheckIndex {
}
}
- private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv) {
+ private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) {
for (int i = 0; i < reader.maxDoc(); i++) {
- ndv.get(i);
+ long value = ndv.get(i);
+ if (docsWithField.get(i) == false && value != 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
+ }
}
}
private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception {
+ Bits docsWithField = reader.getDocsWithField(fi.name);
+ if (docsWithField == null) {
+ throw new RuntimeException(fi.name + " docsWithField does not exist");
+ } else if (docsWithField.length() != reader.maxDoc()) {
+ throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + reader.maxDoc());
+ }
switch(fi.getDocValuesType()) {
case SORTED:
status.totalSortedFields++;
- checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
+ checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1397,7 +1435,7 @@ public class CheckIndex {
break;
case SORTED_SET:
status.totalSortedSetFields++;
- checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
+ checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null) {
@@ -1406,7 +1444,7 @@ public class CheckIndex {
break;
case BINARY:
status.totalBinaryFields++;
- checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
+ checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField);
if (reader.getNumericDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1415,7 +1453,7 @@ public class CheckIndex {
break;
case NUMERIC:
status.totalNumericFields++;
- checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
+ checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField);
if (reader.getBinaryDocValues(fi.name) != null ||
reader.getSortedDocValues(fi.name) != null ||
reader.getSortedSetDocValues(fi.name) != null) {
@@ -1430,7 +1468,7 @@ public class CheckIndex {
private static void checkNorms(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws IOException {
switch(fi.getNormType()) {
case NUMERIC:
- checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name));
+ checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc()));
break;
default:
throw new AssertionError("wtf: " + fi.getNormType());
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java Fri Aug 30 15:06:42 2013
@@ -175,7 +175,9 @@ final class DocInverterPerField extends
}
// trigger streams to perform end-of-stream operations
stream.end();
-
+ // TODO: maybe add some safety? then again, its already checked
+ // when we come back around to the field...
+ fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
success2 = true;
} finally {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java Fri Aug 30 15:06:42 2013
@@ -143,7 +143,7 @@ final class DocValuesProcessor extends S
DocValuesWriter writer = writers.get(fieldInfo.name);
NumericDocValuesWriter numericWriter;
if (writer == null) {
- numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed);
+ numericWriter = new NumericDocValuesWriter(fieldInfo, bytesUsed, true);
writers.put(fieldInfo.name, numericWriter);
} else if (!(writer instanceof NumericDocValuesWriter)) {
throw new IllegalArgumentException("Incompatible DocValues type: field \"" + fieldInfo.name + "\" changed from " + getTypeDesc(writer) + " to numeric");
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Aug 30 15:06:42 2013
@@ -19,18 +19,18 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.List;
+import java.util.HashSet;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterFlushQueue.SegmentFlushTicket;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
-import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
-import org.apache.lucene.index.FieldInfos.FieldNumbers;
+import org.apache.lucene.index.IndexWriter.Event;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.InfoStream;
@@ -100,19 +100,15 @@ import org.apache.lucene.util.InfoStream
*/
final class DocumentsWriter {
- Directory directory;
+ private final Directory directory;
private volatile boolean closed;
- final InfoStream infoStream;
- Similarity similarity;
+ private final InfoStream infoStream;
- List<String> newFiles;
+ private final LiveIndexWriterConfig config;
- final IndexWriter indexWriter;
- final LiveIndexWriterConfig indexWriterConfig;
-
- private AtomicInteger numDocsInRAM = new AtomicInteger(0);
+ private final AtomicInteger numDocsInRAM = new AtomicInteger(0);
// TODO: cut over to BytesRefHash in BufferedDeletes
volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue();
@@ -125,73 +121,72 @@ final class DocumentsWriter {
*/
private volatile boolean pendingChangesInCurrentFullFlush;
- private Collection<String> abortedFiles; // List of files that were written before last abort()
-
- final IndexingChain chain;
-
final DocumentsWriterPerThreadPool perThreadPool;
final FlushPolicy flushPolicy;
final DocumentsWriterFlushControl flushControl;
+ private final IndexWriter writer;
+ private final Queue<Event> events;
+
- final Codec codec;
- DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
- BufferedDeletesStream bufferedDeletesStream) {
- this.codec = codec;
+ DocumentsWriter(IndexWriter writer, LiveIndexWriterConfig config, Directory directory) {
this.directory = directory;
- this.indexWriter = writer;
+ this.config = config;
this.infoStream = config.getInfoStream();
- this.similarity = config.getSimilarity();
- this.indexWriterConfig = writer.getConfig();
this.perThreadPool = config.getIndexerThreadPool();
- this.chain = config.getIndexingChain();
- this.perThreadPool.initialize(this, globalFieldNumbers, config);
flushPolicy = config.getFlushPolicy();
- assert flushPolicy != null;
- flushPolicy.init(this);
- flushControl = new DocumentsWriterFlushControl(this, config);
+ this.writer = writer;
+ this.events = new ConcurrentLinkedQueue<Event>();
+ flushControl = new DocumentsWriterFlushControl(this, config, writer.bufferedDeletesStream);
}
-
- synchronized void deleteQueries(final Query... queries) throws IOException {
+
+ synchronized boolean deleteQueries(final Query... queries) throws IOException {
+ // TODO why is this synchronized?
+ final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue;
deleteQueue.addDelete(queries);
flushControl.doOnDelete();
- if (flushControl.doApplyAllDeletes()) {
- applyAllDeletes(deleteQueue);
- }
+ return applyAllDeletes(deleteQueue);
}
// TODO: we could check w/ FreqProxTermsWriter: if the
// term doesn't exist, don't bother buffering into the
// per-DWPT map (but still must go into the global map)
- synchronized void deleteTerms(final Term... terms) throws IOException {
+ synchronized boolean deleteTerms(final Term... terms) throws IOException {
+ // TODO why is this synchronized?
final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue;
deleteQueue.addDelete(terms);
flushControl.doOnDelete();
- if (flushControl.doApplyAllDeletes()) {
- applyAllDeletes(deleteQueue);
- }
+ return applyAllDeletes( deleteQueue);
}
DocumentsWriterDeleteQueue currentDeleteSession() {
return deleteQueue;
}
- private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
- if (deleteQueue != null && !flushControl.isFullFlush()) {
- ticketQueue.addDeletesAndPurge(this, deleteQueue);
+ private final boolean applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
+ if (flushControl.doApplyAllDeletes()) {
+ if (deleteQueue != null && !flushControl.isFullFlush()) {
+ ticketQueue.addDeletes(deleteQueue);
+ }
+ putEvent(ApplyDeletesEvent.INSTANCE); // apply deletes event forces a purge
+ return true;
}
- indexWriter.applyAllDeletes();
- indexWriter.flushCount.incrementAndGet();
+ return false;
}
+
+ final int purgeBuffer(IndexWriter writer, boolean forced) throws IOException {
+ if (forced) {
+ return ticketQueue.forcePurge(writer);
+ } else {
+ return ticketQueue.tryPurge(writer);
+ }
+ }
+
/** Returns how many docs are currently buffered in RAM. */
int getNumDocs() {
return numDocsInRAM.get();
}
- Collection<String> abortedFiles() {
- return abortedFiles;
- }
-
private void ensureOpen() throws AlreadyClosedException {
if (closed) {
throw new AlreadyClosedException("this IndexWriter is closed");
@@ -202,45 +197,37 @@ final class DocumentsWriter {
* updating the index files) and must discard all
* currently buffered docs. This resets our state,
* discarding any docs added since last flush. */
- synchronized void abort() {
+ synchronized void abort(IndexWriter writer) {
+ assert !Thread.holdsLock(writer) : "IndexWriter lock should never be hold when aborting";
boolean success = false;
-
+ final Set<String> newFilesSet = new HashSet<String>();
try {
deleteQueue.clear();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "abort");
}
-
final int limit = perThreadPool.getActiveThreadState();
for (int i = 0; i < limit; i++) {
final ThreadState perThread = perThreadPool.getThreadState(i);
perThread.lock();
try {
- if (perThread.isActive()) { // we might be closed
- try {
- perThread.dwpt.abort();
- } finally {
- perThread.dwpt.checkAndResetHasAborted();
- flushControl.doOnAbort(perThread);
- }
- } else {
- assert closed;
- }
+ abortThreadState(perThread, newFilesSet);
} finally {
perThread.unlock();
}
}
- flushControl.abortPendingFlushes();
+ flushControl.abortPendingFlushes(newFilesSet);
+ putEvent(new DeleteNewFilesEvent(newFilesSet));
flushControl.waitForFlush();
success = true;
} finally {
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles + " success=" + success);
+ infoStream.message("DW", "done abort; abortedFiles=" + newFilesSet + " success=" + success);
}
}
}
- synchronized void lockAndAbortAll() {
+ synchronized void lockAndAbortAll(IndexWriter indexWriter) {
assert indexWriter.holdsFullFlushLock();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "lockAndAbortAll");
@@ -249,20 +236,15 @@ final class DocumentsWriter {
try {
deleteQueue.clear();
final int limit = perThreadPool.getMaxThreadStates();
+ final Set<String> newFilesSet = new HashSet<String>();
for (int i = 0; i < limit; i++) {
final ThreadState perThread = perThreadPool.getThreadState(i);
perThread.lock();
- if (perThread.isActive()) { // we might be closed or
- try {
- perThread.dwpt.abort();
- } finally {
- perThread.dwpt.checkAndResetHasAborted();
- flushControl.doOnAbort(perThread);
- }
- }
+ abortThreadState(perThread, newFilesSet);
}
deleteQueue.clear();
- flushControl.abortPendingFlushes();
+ flushControl.abortPendingFlushes(newFilesSet);
+ putEvent(new DeleteNewFilesEvent(newFilesSet));
flushControl.waitForFlush();
success = true;
} finally {
@@ -271,12 +253,31 @@ final class DocumentsWriter {
}
if (!success) {
// if something happens here we unlock all states again
- unlockAllAfterAbortAll();
+ unlockAllAfterAbortAll(indexWriter);
+ }
+ }
+ }
+
+ private final void abortThreadState(final ThreadState perThread, Set<String> newFiles) {
+ assert perThread.isHeldByCurrentThread();
+ if (perThread.isActive()) { // we might be closed
+ if (perThread.isInitialized()) {
+ try {
+ subtractFlushedNumDocs(perThread.dwpt.getNumDocsInRAM());
+ perThread.dwpt.abort(newFiles);
+ } finally {
+ perThread.dwpt.checkAndResetHasAborted();
+ flushControl.doOnAbort(perThread);
+ }
+ } else {
+ flushControl.doOnAbort(perThread);
}
+ } else {
+ assert closed;
}
}
- final synchronized void unlockAllAfterAbortAll() {
+ final synchronized void unlockAllAfterAbortAll(IndexWriter indexWriter) {
assert indexWriter.holdsFullFlushLock();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "unlockAll");
@@ -334,7 +335,7 @@ final class DocumentsWriter {
private boolean preUpdate() throws IOException {
ensureOpen();
- boolean maybeMerge = false;
+ boolean hasEvents = false;
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
if (infoStream.isEnabled("DW")) {
@@ -345,7 +346,7 @@ final class DocumentsWriter {
DocumentsWriterPerThread flushingDWPT;
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
// Don't push the delete here since the update could fail!
- maybeMerge |= doFlush(flushingDWPT);
+ hasEvents |= doFlush(flushingDWPT);
}
if (infoStream.isEnabled("DW")) {
@@ -361,28 +362,35 @@ final class DocumentsWriter {
infoStream.message("DW", "continue indexing after helping out flushing DocumentsWriter is healthy");
}
}
- return maybeMerge;
+ return hasEvents;
}
- private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
- if (flushControl.doApplyAllDeletes()) {
- applyAllDeletes(deleteQueue);
- }
+ private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean hasEvents) throws IOException {
+ hasEvents |= applyAllDeletes(deleteQueue);
if (flushingDWPT != null) {
- maybeMerge |= doFlush(flushingDWPT);
+ hasEvents |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
- maybeMerge |= doFlush(nextPendingFlush);
+ hasEvents |= doFlush(nextPendingFlush);
}
}
- return maybeMerge;
+ return hasEvents;
+ }
+
+ private final void ensureInitialized(ThreadState state) {
+ if (state.isActive() && state.dwpt == null) {
+ final FieldInfos.Builder infos = new FieldInfos.Builder(
+ writer.globalFieldNumberMap);
+ state.dwpt = new DocumentsWriterPerThread(writer.newSegmentName(),
+ directory, config, infoStream, deleteQueue, infos);
+ }
}
boolean updateDocuments(final Iterable<? extends IndexDocument> docs, final Analyzer analyzer,
final Term delTerm) throws IOException {
- boolean maybeMerge = preUpdate();
+ boolean hasEvents = preUpdate();
final ThreadState perThread = flushControl.obtainAndLock();
final DocumentsWriterPerThread flushingDWPT;
@@ -392,13 +400,19 @@ final class DocumentsWriter {
ensureOpen();
assert false: "perThread is not active but we are still open";
}
-
+ ensureInitialized(perThread);
+ assert perThread.isInitialized();
final DocumentsWriterPerThread dwpt = perThread.dwpt;
+ final int dwptNumDocs = dwpt.getNumDocsInRAM();
try {
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
numDocsInRAM.addAndGet(docCount);
} finally {
if (dwpt.checkAndResetHasAborted()) {
+ if (!dwpt.pendingFilesToDelete().isEmpty()) {
+ putEvent(new DeleteNewFilesEvent(dwpt.pendingFilesToDelete()));
+ }
+ subtractFlushedNumDocs(dwptNumDocs);
flushControl.doOnAbort(perThread);
}
}
@@ -408,31 +422,35 @@ final class DocumentsWriter {
perThread.unlock();
}
- return postUpdate(flushingDWPT, maybeMerge);
+ return postUpdate(flushingDWPT, hasEvents);
}
boolean updateDocument(final IndexDocument doc, final Analyzer analyzer,
final Term delTerm) throws IOException {
- boolean maybeMerge = preUpdate();
+ boolean hasEvents = preUpdate();
final ThreadState perThread = flushControl.obtainAndLock();
final DocumentsWriterPerThread flushingDWPT;
-
try {
-
if (!perThread.isActive()) {
ensureOpen();
- throw new IllegalStateException("perThread is not active but we are still open");
+ assert false: "perThread is not active but we are still open";
}
-
+ ensureInitialized(perThread);
+ assert perThread.isInitialized();
final DocumentsWriterPerThread dwpt = perThread.dwpt;
+ final int dwptNumDocs = dwpt.getNumDocsInRAM();
try {
dwpt.updateDocument(doc, analyzer, delTerm);
numDocsInRAM.incrementAndGet();
} finally {
if (dwpt.checkAndResetHasAborted()) {
+ if (!dwpt.pendingFilesToDelete().isEmpty()) {
+ putEvent(new DeleteNewFilesEvent(dwpt.pendingFilesToDelete()));
+ }
+ subtractFlushedNumDocs(dwptNumDocs);
flushControl.doOnAbort(perThread);
}
}
@@ -442,13 +460,13 @@ final class DocumentsWriter {
perThread.unlock();
}
- return postUpdate(flushingDWPT, maybeMerge);
+ return postUpdate(flushingDWPT, hasEvents);
}
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
- boolean maybeMerge = false;
+ boolean hasEvents = false;
while (flushingDWPT != null) {
- maybeMerge = true;
+ hasEvents = true;
boolean success = false;
SegmentFlushTicket ticket = null;
try {
@@ -474,9 +492,24 @@ final class DocumentsWriter {
// Each flush is assigned a ticket in the order they acquire the ticketQueue lock
ticket = ticketQueue.addFlushTicket(flushingDWPT);
- // flush concurrently without locking
- final FlushedSegment newSegment = flushingDWPT.flush();
- ticketQueue.addSegment(ticket, newSegment);
+ final int flushingDocsInRam = flushingDWPT.getNumDocsInRAM();
+ boolean dwptSuccess = false;
+ try {
+ // flush concurrently without locking
+ final FlushedSegment newSegment = flushingDWPT.flush();
+ ticketQueue.addSegment(ticket, newSegment);
+ dwptSuccess = true;
+ } finally {
+ subtractFlushedNumDocs(flushingDocsInRam);
+ if (!flushingDWPT.pendingFilesToDelete().isEmpty()) {
+ putEvent(new DeleteNewFilesEvent(flushingDWPT.pendingFilesToDelete()));
+ hasEvents = true;
+ }
+ if (!dwptSuccess) {
+ putEvent(new FlushFailedEvent(flushingDWPT.getSegmentInfo()));
+ hasEvents = true;
+ }
+ }
// flush was successful once we reached this point - new seg. has been assigned to the ticket!
success = true;
} finally {
@@ -496,54 +529,38 @@ final class DocumentsWriter {
// thread in innerPurge can't keep up with all
// other threads flushing segments. In this case
// we forcefully stall the producers.
- ticketQueue.forcePurge(this);
- } else {
- ticketQueue.tryPurge(this);
+ putEvent(ForcedPurgeEvent.INSTANCE);
+ break;
}
-
} finally {
flushControl.doAfterFlush(flushingDWPT);
flushingDWPT.checkAndResetHasAborted();
- indexWriter.flushCount.incrementAndGet();
- indexWriter.doAfterFlush();
}
flushingDWPT = flushControl.nextPendingFlush();
}
-
+ if (hasEvents) {
+ putEvent(MergePendingEvent.INSTANCE);
+ }
// If deletes alone are consuming > 1/2 our RAM
// buffer, force them all to apply now. This is to
// prevent too-frequent flushing of a long tail of
// tiny segments:
- final double ramBufferSizeMB = indexWriterConfig.getRAMBufferSizeMB();
+ final double ramBufferSizeMB = config.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "force apply deletes bytesUsed=" + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*ramBufferSizeMB));
}
- applyAllDeletes(deleteQueue);
+ hasEvents = true;
+ if (!this.applyAllDeletes(deleteQueue)) {
+ putEvent(ApplyDeletesEvent.INSTANCE);
+ }
}
- return maybeMerge;
+ return hasEvents;
}
-
- void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
- throws IOException {
- // Finish the flushed segment and publish it to IndexWriter
- if (newSegment == null) {
- assert bufferedDeletes != null;
- if (bufferedDeletes != null && bufferedDeletes.any()) {
- indexWriter.publishFrozenDeletes(bufferedDeletes);
- if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "flush: push buffered deletes: " + bufferedDeletes);
- }
- }
- } else {
- publishFlushedSegment(newSegment, bufferedDeletes);
- }
- }
-
final void subtractFlushedNumDocs(int numFlushed) {
int oldValue = numDocsInRAM.get();
while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) {
@@ -551,29 +568,6 @@ final class DocumentsWriter {
}
}
- /**
- * Publishes the flushed segment, segment private deletes (if any) and its
- * associated global delete (if present) to IndexWriter. The actual
- * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s
- * delete generation is always GlobalPacket_deleteGeneration + 1
- */
- private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket)
- throws IOException {
- assert newSegment != null;
- assert newSegment.segmentInfo != null;
- final FrozenBufferedDeletes segmentDeletes = newSegment.segmentDeletes;
- //System.out.println("FLUSH: " + newSegment.segmentInfo.info.name);
- if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "publishFlushedSegment seg-private deletes=" + segmentDeletes);
- }
-
- if (segmentDeletes != null && infoStream.isEnabled("DW")) {
- infoStream.message("DW", "flush: push buffered seg private deletes: " + segmentDeletes);
- }
- // now publish!
- indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes, globalPacket);
- }
-
// for asserts
private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null;
@@ -588,7 +582,7 @@ final class DocumentsWriter {
* two stage operation; the caller must ensure (in try/finally) that finishFlush
* is called after this method, to release the flush lock in DWFlushControl
*/
- final boolean flushAllThreads()
+ final boolean flushAllThreads(final IndexWriter indexWriter)
throws IOException {
final DocumentsWriterDeleteQueue flushingDeleteQueue;
if (infoStream.isEnabled("DW")) {
@@ -620,10 +614,9 @@ final class DocumentsWriter {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes");
}
- ticketQueue.addDeletesAndPurge(this, flushingDeleteQueue);
- } else {
- ticketQueue.forcePurge(this);
- }
+ ticketQueue.addDeletes(flushingDeleteQueue);
+ }
+ ticketQueue.forcePurge(indexWriter);
assert !flushingDeleteQueue.anyChanges() && !ticketQueue.hasTickets();
} finally {
assert flushingDeleteQueue == currentFullFlushDelQueue;
@@ -641,11 +634,94 @@ final class DocumentsWriter {
// Release the flush lock
flushControl.finishFullFlush();
} else {
- flushControl.abortFullFlushes();
+ Set<String> newFilesSet = new HashSet<>();
+ flushControl.abortFullFlushes(newFilesSet);
+ putEvent(new DeleteNewFilesEvent(newFilesSet));
+
}
} finally {
pendingChangesInCurrentFullFlush = false;
}
}
+
+ public LiveIndexWriterConfig getIndexWriterConfig() {
+ return config;
+ }
+
+ private void putEvent(Event event) {
+ events.add(event);
+ }
+
+ static final class ApplyDeletesEvent implements Event {
+ static final Event INSTANCE = new ApplyDeletesEvent();
+ private int instCount = 0;
+ private ApplyDeletesEvent() {
+ assert instCount == 0;
+ instCount++;
+ }
+
+ @Override
+ public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException {
+ writer.applyDeletesAndPurge(true); // we always purge!
+ }
+ }
+
+ static final class MergePendingEvent implements Event {
+ static final Event INSTANCE = new MergePendingEvent();
+ private int instCount = 0;
+ private MergePendingEvent() {
+ assert instCount == 0;
+ instCount++;
+ }
+
+ @Override
+ public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException {
+ writer.doAfterSegmentFlushed(triggerMerge, forcePurge);
+ }
+ }
+
+ static final class ForcedPurgeEvent implements Event {
+ static final Event INSTANCE = new ForcedPurgeEvent();
+ private int instCount = 0;
+ private ForcedPurgeEvent() {
+ assert instCount == 0;
+ instCount++;
+ }
+
+ @Override
+ public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException {
+ writer.purge(true);
+ }
+ }
+
+ static class FlushFailedEvent implements Event {
+ private final SegmentInfo info;
+
+ public FlushFailedEvent(SegmentInfo info) {
+ this.info = info;
+ }
+
+ @Override
+ public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException {
+ writer.flushFailed(info);
+ }
+ }
+
+ static class DeleteNewFilesEvent implements Event {
+ private final Collection<String> files;
+
+ public DeleteNewFilesEvent(Collection<String> files) {
+ this.files = files;
+ }
+
+ @Override
+ public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException {
+ writer.deleteNewFiles(files);
+ }
+ }
+
+ public Queue<Event> eventQueue() {
+ return events;
+ }
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Fri Aug 30 15:06:42 2013
@@ -23,9 +23,11 @@ import java.util.List;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
+import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.ThreadInterruptedException;
/**
@@ -66,14 +68,18 @@ final class DocumentsWriterFlushControl
private boolean closed = false;
private final DocumentsWriter documentsWriter;
private final LiveIndexWriterConfig config;
+ private final BufferedDeletesStream bufferedDeletesStream;
+ private final InfoStream infoStream;
- DocumentsWriterFlushControl(DocumentsWriter documentsWriter, LiveIndexWriterConfig config) {
+ DocumentsWriterFlushControl(DocumentsWriter documentsWriter, LiveIndexWriterConfig config, BufferedDeletesStream bufferedDeletesStream) {
+ this.infoStream = config.getInfoStream();
this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy;
- this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
this.config = config;
+ this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
this.documentsWriter = documentsWriter;
+ this.bufferedDeletesStream = bufferedDeletesStream;
}
public synchronized long activeBytes() {
@@ -240,7 +246,6 @@ final class DocumentsWriterFlushControl
}
public synchronized void waitForFlush() {
- assert !Thread.holdsLock(this.documentsWriter.indexWriter) : "IW lock should never be hold when waiting on flush";
while (flushingWriters.size() != 0) {
try {
this.wait();
@@ -277,7 +282,7 @@ final class DocumentsWriterFlushControl
}
assert assertMemory();
// Take it out of the loop this DWPT is stale
- perThreadPool.replaceForFlush(state, closed);
+ perThreadPool.reset(state, closed);
} finally {
updateStallState();
}
@@ -295,7 +300,7 @@ final class DocumentsWriterFlushControl
assert fullFlush : "can not block if fullFlush == false";
final DocumentsWriterPerThread dwpt;
final long bytes = perThread.bytesUsed;
- dwpt = perThreadPool.replaceForFlush(perThread, closed);
+ dwpt = perThreadPool.reset(perThread, closed);
numPending--;
blockedFlushes.add(new BlockedFlush(dwpt, bytes));
}finally {
@@ -311,12 +316,12 @@ final class DocumentsWriterFlushControl
// We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) {
try {
- if (perThread.isActive()) {
+ if (perThread.isInitialized()) {
assert perThread.isHeldByCurrentThread();
final DocumentsWriterPerThread dwpt;
final long bytes = perThread.bytesUsed; // do that before
// replace!
- dwpt = perThreadPool.replaceForFlush(perThread, closed);
+ dwpt = perThreadPool.reset(perThread, closed);
assert !flushingWriters.containsKey(dwpt) : "DWPT is already flushing";
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(dwpt, Long.valueOf(bytes));
@@ -413,11 +418,11 @@ final class DocumentsWriterFlushControl
* Returns the number of delete terms in the global pool
*/
public int getNumGlobalTermDeletes() {
- return documentsWriter.deleteQueue.numGlobalTermDeletes() + documentsWriter.indexWriter.bufferedDeletesStream.numTerms();
+ return documentsWriter.deleteQueue.numGlobalTermDeletes() + bufferedDeletesStream.numTerms();
}
public long getDeleteBytesUsed() {
- return documentsWriter.deleteQueue.bytesUsed() + documentsWriter.indexWriter.bufferedDeletesStream.bytesUsed();
+ return documentsWriter.deleteQueue.bytesUsed() + bufferedDeletesStream.bytesUsed();
}
synchronized int numFlushingDWPT() {
@@ -441,7 +446,7 @@ final class DocumentsWriterFlushControl
.currentThread(), documentsWriter);
boolean success = false;
try {
- if (perThread.isActive()
+ if (perThread.isInitialized()
&& perThread.dwpt.deleteQueue != documentsWriter.deleteQueue) {
// There is a flush-all in process and this DWPT is
// now stale -- enroll it for flush and try for
@@ -475,7 +480,10 @@ final class DocumentsWriterFlushControl
final ThreadState next = perThreadPool.getThreadState(i);
next.lock();
try {
- if (!next.isActive()) {
+ if (!next.isInitialized()) {
+ if (closed && next.isActive()) {
+ perThreadPool.deactivateThreadState(next);
+ }
continue;
}
assert next.dwpt.deleteQueue == flushingQueue
@@ -515,7 +523,7 @@ final class DocumentsWriterFlushControl
final ThreadState next = perThreadPool.getThreadState(i);
next.lock();
try {
- assert !next.isActive() || next.dwpt.deleteQueue == queue;
+ assert !next.isInitialized() || next.dwpt.deleteQueue == queue : "isInitialized: " + next.isInitialized() + " numDocs: " + (next.isInitialized() ? next.dwpt.getNumDocsInRAM() : 0) ;
} finally {
next.unlock();
}
@@ -526,12 +534,12 @@ final class DocumentsWriterFlushControl
private final List<DocumentsWriterPerThread> fullFlushBuffer = new ArrayList<DocumentsWriterPerThread>();
void addFlushableState(ThreadState perThread) {
- if (documentsWriter.infoStream.isEnabled("DWFC")) {
- documentsWriter.infoStream.message("DWFC", "addFlushableState " + perThread.dwpt);
+ if (infoStream.isEnabled("DWFC")) {
+ infoStream.message("DWFC", "addFlushableState " + perThread.dwpt);
}
final DocumentsWriterPerThread dwpt = perThread.dwpt;
assert perThread.isHeldByCurrentThread();
- assert perThread.isActive();
+ assert perThread.isInitialized();
assert fullFlush;
assert dwpt.deleteQueue != documentsWriter.deleteQueue;
if (dwpt.getNumDocsInRAM() > 0) {
@@ -545,11 +553,7 @@ final class DocumentsWriterFlushControl
fullFlushBuffer.add(flushingDWPT);
}
} else {
- if (closed) {
- perThreadPool.deactivateThreadState(perThread); // make this state inactive
- } else {
- perThreadPool.reinitThreadState(perThread);
- }
+ perThreadPool.reset(perThread, closed); // make this state inactive
}
}
@@ -594,19 +598,20 @@ final class DocumentsWriterFlushControl
return true;
}
- synchronized void abortFullFlushes() {
+ synchronized void abortFullFlushes(Set<String> newFiles) {
try {
- abortPendingFlushes();
+ abortPendingFlushes(newFiles);
} finally {
fullFlush = false;
}
}
- synchronized void abortPendingFlushes() {
+ synchronized void abortPendingFlushes(Set<String> newFiles) {
try {
for (DocumentsWriterPerThread dwpt : flushQueue) {
try {
- dwpt.abort();
+ documentsWriter.subtractFlushedNumDocs(dwpt.getNumDocsInRAM());
+ dwpt.abort(newFiles);
} catch (Throwable ex) {
// ignore - keep on aborting the flush queue
} finally {
@@ -617,7 +622,8 @@ final class DocumentsWriterFlushControl
try {
flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
- blockedFlush.dwpt.abort();
+ documentsWriter.subtractFlushedNumDocs(blockedFlush.dwpt.getNumDocsInRAM());
+ blockedFlush.dwpt.abort(newFiles);
} catch (Throwable ex) {
// ignore - keep on aborting the blocked queue
} finally {
@@ -670,8 +676,8 @@ final class DocumentsWriterFlushControl
* checked out DWPT are available
*/
void waitIfStalled() {
- if (documentsWriter.infoStream.isEnabled("DWFC")) {
- documentsWriter.infoStream.message("DWFC",
+ if (infoStream.isEnabled("DWFC")) {
+ infoStream.message("DWFC",
"waitIfStalled: numFlushesPending: " + flushQueue.size()
+ " netBytes: " + netBytes() + " flushBytes: " + flushBytes()
+ " fullFlush: " + fullFlush);
@@ -686,5 +692,12 @@ final class DocumentsWriterFlushControl
return stallControl.anyStalledThreads();
}
+ /**
+ * Returns the {@link IndexWriter} {@link InfoStream}
+ */
+ public InfoStream getInfoStream() {
+ return infoStream;
+ }
+
}
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java Fri Aug 30 15:06:42 2013
@@ -34,8 +34,7 @@ class DocumentsWriterFlushQueue {
private final AtomicInteger ticketCount = new AtomicInteger();
private final ReentrantLock purgeLock = new ReentrantLock();
- void addDeletesAndPurge(DocumentsWriter writer,
- DocumentsWriterDeleteQueue deleteQueue) throws IOException {
+ void addDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
synchronized (this) {
incTickets();// first inc the ticket count - freeze opens
// a window for #anyChanges to fail
@@ -49,9 +48,6 @@ class DocumentsWriterFlushQueue {
}
}
}
- // don't hold the lock on the FlushQueue when forcing the purge - this blocks and deadlocks
- // if we hold the lock.
- forcePurge(writer);
}
private void incTickets() {
@@ -98,8 +94,9 @@ class DocumentsWriterFlushQueue {
return ticketCount.get() != 0;
}
- private void innerPurge(DocumentsWriter writer) throws IOException {
+ private int innerPurge(IndexWriter writer) throws IOException {
assert purgeLock.isHeldByCurrentThread();
+ int numPurged = 0;
while (true) {
final FlushTicket head;
final boolean canPublish;
@@ -108,6 +105,7 @@ class DocumentsWriterFlushQueue {
canPublish = head != null && head.canPublish(); // do this synced
}
if (canPublish) {
+ numPurged++;
try {
/*
* if we block on publish -> lock IW -> lock BufferedDeletes we don't block
@@ -116,6 +114,7 @@ class DocumentsWriterFlushQueue {
* be a ticket still in the queue.
*/
head.publish(writer);
+
} finally {
synchronized (this) {
// finally remove the published ticket from the queue
@@ -128,27 +127,31 @@ class DocumentsWriterFlushQueue {
break;
}
}
+ return numPurged;
}
- void forcePurge(DocumentsWriter writer) throws IOException {
+ int forcePurge(IndexWriter writer) throws IOException {
assert !Thread.holdsLock(this);
+ assert !Thread.holdsLock(writer);
purgeLock.lock();
try {
- innerPurge(writer);
+ return innerPurge(writer);
} finally {
purgeLock.unlock();
}
}
- void tryPurge(DocumentsWriter writer) throws IOException {
+ int tryPurge(IndexWriter writer) throws IOException {
assert !Thread.holdsLock(this);
+ assert !Thread.holdsLock(writer);
if (purgeLock.tryLock()) {
try {
- innerPurge(writer);
+ return innerPurge(writer);
} finally {
purgeLock.unlock();
}
}
+ return 0;
}
public int getTicketCount() {
@@ -169,8 +172,47 @@ class DocumentsWriterFlushQueue {
this.frozenDeletes = frozenDeletes;
}
- protected abstract void publish(DocumentsWriter writer) throws IOException;
+ protected abstract void publish(IndexWriter writer) throws IOException;
protected abstract boolean canPublish();
+
+ /**
+ * Publishes the flushed segment, segment private deletes (if any) and its
+ * associated global delete (if present) to IndexWriter. The actual
+ * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s
+ * delete generation is always GlobalPacket_deleteGeneration + 1
+ */
+ protected final void publishFlushedSegment(IndexWriter indexWriter, FlushedSegment newSegment, FrozenBufferedDeletes globalPacket)
+ throws IOException {
+ assert newSegment != null;
+ assert newSegment.segmentInfo != null;
+ final FrozenBufferedDeletes segmentDeletes = newSegment.segmentDeletes;
+ //System.out.println("FLUSH: " + newSegment.segmentInfo.info.name);
+ if (indexWriter.infoStream.isEnabled("DW")) {
+ indexWriter.infoStream.message("DW", "publishFlushedSegment seg-private deletes=" + segmentDeletes);
+ }
+
+ if (segmentDeletes != null && indexWriter.infoStream.isEnabled("DW")) {
+ indexWriter.infoStream.message("DW", "flush: push buffered seg private deletes: " + segmentDeletes);
+ }
+ // now publish!
+ indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes, globalPacket);
+ }
+
+ protected final void finishFlush(IndexWriter indexWriter, FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
+ throws IOException {
+ // Finish the flushed segment and publish it to IndexWriter
+ if (newSegment == null) {
+ assert bufferedDeletes != null;
+ if (bufferedDeletes != null && bufferedDeletes.any()) {
+ indexWriter.publishFrozenDeletes(bufferedDeletes);
+ if (indexWriter.infoStream.isEnabled("DW")) {
+ indexWriter.infoStream.message("DW", "flush: push buffered deletes: " + bufferedDeletes);
+ }
+ }
+ } else {
+ publishFlushedSegment(indexWriter, newSegment, bufferedDeletes);
+ }
+ }
}
static final class GlobalDeletesTicket extends FlushTicket {
@@ -179,11 +221,11 @@ class DocumentsWriterFlushQueue {
super(frozenDeletes);
}
@Override
- protected void publish(DocumentsWriter writer) throws IOException {
+ protected void publish(IndexWriter writer) throws IOException {
assert !published : "ticket was already publised - can not publish twice";
published = true;
// its a global ticket - no segment to publish
- writer.finishFlush(null, frozenDeletes);
+ finishFlush(writer, null, frozenDeletes);
}
@Override
@@ -201,10 +243,10 @@ class DocumentsWriterFlushQueue {
}
@Override
- protected void publish(DocumentsWriter writer) throws IOException {
+ protected void publish(IndexWriter writer) throws IOException {
assert !published : "ticket was already publised - can not publish twice";
published = true;
- writer.finishFlush(segment, frozenDeletes);
+ finishFlush(writer, segment, frozenDeletes);
}
protected void setSegment(FlushedSegment segment) {
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1518989&r1=1518988&r2=1518989&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Fri Aug 30 15:06:42 2013
@@ -22,6 +22,7 @@ import java.text.NumberFormat;
import java.util.Collection;
import java.util.HashSet;
import java.util.Locale;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
@@ -144,7 +145,7 @@ class DocumentsWriterPerThread {
* updating the index files) and must discard all
* currently buffered docs. This resets our state,
* discarding any docs added since last flush. */
- void abort() {
+ void abort(Set<String> createdFiles) {
//System.out.println(Thread.currentThread().getName() + ": now abort seg=" + segmentInfo.name);
hasAborted = aborting = true;
try {
@@ -157,10 +158,7 @@ class DocumentsWriterPerThread {
}
pendingDeletes.clear();
- deleteSlice = deleteQueue.newSlice();
- // Reset all postings data
- doAfterFlush();
-
+ createdFiles.addAll(directory.getCreatedFiles());
} finally {
aborting = false;
if (infoStream.isEnabled("DWPT")) {
@@ -169,83 +167,77 @@ class DocumentsWriterPerThread {
}
}
private final static boolean INFO_VERBOSE = false;
- final DocumentsWriter parent;
final Codec codec;
- final IndexWriter writer;
final TrackingDirectoryWrapper directory;
final Directory directoryOrig;
final DocState docState;
final DocConsumer consumer;
final Counter bytesUsed;
- SegmentWriteState flushState;
//Deletes for our still-in-RAM (to be flushed next) segment
- BufferedDeletes pendingDeletes;
- SegmentInfo segmentInfo; // Current segment we are working on
+ final BufferedDeletes pendingDeletes;
+ private final SegmentInfo segmentInfo; // Current segment we are working on
boolean aborting = false; // True if an abort is pending
boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
private FieldInfos.Builder fieldInfos;
private final InfoStream infoStream;
private int numDocsInRAM;
- private int flushedDocCount;
- DocumentsWriterDeleteQueue deleteQueue;
- DeleteSlice deleteSlice;
+ final DocumentsWriterDeleteQueue deleteQueue;
+ private final DeleteSlice deleteSlice;
private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator;
private final LiveIndexWriterConfig indexWriterConfig;
- public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
- FieldInfos.Builder fieldInfos, IndexingChain indexingChain) {
+ public DocumentsWriterPerThread(String segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue,
+ FieldInfos.Builder fieldInfos) {
this.directoryOrig = directory;
this.directory = new TrackingDirectoryWrapper(directory);
- this.parent = parent;
this.fieldInfos = fieldInfos;
- this.writer = parent.indexWriter;
- this.indexWriterConfig = parent.indexWriterConfig;
- this.infoStream = parent.infoStream;
- this.codec = parent.codec;
+ this.indexWriterConfig = indexWriterConfig;
+ this.infoStream = infoStream;
+ this.codec = indexWriterConfig.getCodec();
this.docState = new DocState(this, infoStream);
- this.docState.similarity = parent.indexWriter.getConfig().getSimilarity();
+ this.docState.similarity = indexWriterConfig.getSimilarity();
bytesUsed = Counter.newCounter();
byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
pendingDeletes = new BufferedDeletes();
intBlockAllocator = new IntBlockAllocator(bytesUsed);
- initialize();
+ this.deleteQueue = deleteQueue;
+ assert numDocsInRAM == 0 : "num docs " + numDocsInRAM;
+ pendingDeletes.clear();
+ deleteSlice = deleteQueue.newSlice();
+
+ segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1,
+ false, codec, null, null);
+ assert numDocsInRAM == 0;
+ if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
+ infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
+ }
// this should be the last call in the ctor
// it really sucks that we need to pull this within the ctor and pass this ref to the chain!
- consumer = indexingChain.getChain(this);
- }
-
- public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos.Builder fieldInfos) {
- this(other.directoryOrig, other.parent, fieldInfos, other.parent.chain);
+ consumer = indexWriterConfig.getIndexingChain().getChain(this);
+
}
- void initialize() {
- deleteQueue = parent.deleteQueue;
- assert numDocsInRAM == 0 : "num docs " + numDocsInRAM;
- pendingDeletes.clear();
- deleteSlice = null;
- }
-
void setAborting() {
aborting = true;
}
+
+ boolean checkAndResetHasAborted() {
+ final boolean retval = hasAborted;
+ hasAborted = false;
+ return retval;
+ }
final boolean testPoint(String message) {
if (infoStream.isEnabled("TP")) {
- infoStream.message("TP", message);
+ infoStream.message("TP", message);
}
return true;
}
-
- boolean checkAndResetHasAborted() {
- final boolean retval = hasAborted;
- hasAborted = false;
- return retval;
- }
public void updateDocument(IndexDocument doc, Analyzer analyzer, Term delTerm) throws IOException {
assert testPoint("DocumentsWriterPerThread addDocument start");
@@ -253,9 +245,6 @@ class DocumentsWriterPerThread {
docState.doc = doc;
docState.analyzer = analyzer;
docState.docID = numDocsInRAM;
- if (segmentInfo == null) {
- initSegmentInfo();
- }
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
@@ -274,7 +263,7 @@ class DocumentsWriterPerThread {
deleteDocID(docState.docID);
numDocsInRAM++;
} else {
- abort();
+ abort(filesToDelete);
}
}
}
@@ -284,29 +273,16 @@ class DocumentsWriterPerThread {
success = true;
} finally {
if (!success) {
- abort();
+ abort(filesToDelete);
}
}
finishDocument(delTerm);
}
- private void initSegmentInfo() {
- String segment = writer.newSegmentName();
- segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, -1,
- false, codec, null, null);
- assert numDocsInRAM == 0;
- if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
- infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segment + " delQueue=" + deleteQueue);
- }
- }
-
public int updateDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer, Term delTerm) throws IOException {
assert testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
- if (segmentInfo == null) {
- initSegmentInfo();
- }
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
@@ -331,7 +307,7 @@ class DocumentsWriterPerThread {
// be called (because an exc is being thrown):
numDocsInRAM++;
} else {
- abort();
+ abort(filesToDelete);
}
}
}
@@ -341,7 +317,7 @@ class DocumentsWriterPerThread {
success = true;
} finally {
if (!success) {
- abort();
+ abort(filesToDelete);
}
}
@@ -384,21 +360,18 @@ class DocumentsWriterPerThread {
* the updated slice we get from 1. holds all the deletes that have occurred
* since we updated the slice the last time.
*/
- if (deleteSlice == null) {
- deleteSlice = deleteQueue.newSlice();
- if (delTerm != null) {
- deleteQueue.add(delTerm, deleteSlice);
- deleteSlice.reset();
- }
-
- } else {
- if (delTerm != null) {
- deleteQueue.add(delTerm, deleteSlice);
- assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
- deleteSlice.apply(pendingDeletes, numDocsInRAM);
- } else if (deleteQueue.updateSlice(deleteSlice)) {
- deleteSlice.apply(pendingDeletes, numDocsInRAM);
- }
+ boolean applySlice = numDocsInRAM != 0;
+ if (delTerm != null) {
+ deleteQueue.add(delTerm, deleteSlice);
+ assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
+ } else {
+ applySlice &= deleteQueue.updateSlice(deleteSlice);
+ }
+
+ if (applySlice) {
+ deleteSlice.apply(pendingDeletes, numDocsInRAM);
+ } else { // if we don't need to apply we must reset!
+ deleteSlice.reset();
}
++numDocsInRAM;
}
@@ -434,14 +407,6 @@ class DocumentsWriterPerThread {
return numDocsInRAM;
}
- /** Reset after a flush */
- private void doAfterFlush() {
- segmentInfo = null;
- directory.getCreatedFiles().clear();
- fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers);
- parent.subtractFlushedNumDocs(numDocsInRAM);
- numDocsInRAM = 0;
- }
/**
* Prepares this DWPT for flushing. This method will freeze and return the
@@ -457,7 +422,7 @@ class DocumentsWriterPerThread {
// apply all deletes before we flush and release the delete slice
deleteSlice.apply(pendingDeletes, numDocsInRAM);
assert deleteSlice.isEmpty();
- deleteSlice = null;
+ deleteSlice.reset();
}
return globalDeletes;
}
@@ -465,11 +430,11 @@ class DocumentsWriterPerThread {
/** Flush all pending docs to a new segment */
FlushedSegment flush() throws IOException {
assert numDocsInRAM > 0;
- assert deleteSlice == null : "all deletes must be applied in prepareFlush";
+ assert deleteSlice.isEmpty() : "all deletes must be applied in prepareFlush";
segmentInfo.setDocCount(numDocsInRAM);
- flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
+ final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
- final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
+ final double startMBUsed = bytesUsed() / 1024. / 1024.;
// Apply delete-by-docID now (delete-byDocID only
// happens when an exception is hit processing that
@@ -515,15 +480,11 @@ class DocumentsWriterPerThread {
infoStream.message("DWPT", "flushed codec=" + codec);
}
- flushedDocCount += flushState.segmentInfo.getDocCount();
-
final BufferedDeletes segmentDeletes;
if (pendingDeletes.queries.isEmpty()) {
- pendingDeletes.clear();
segmentDeletes = null;
} else {
segmentDeletes = pendingDeletes;
- pendingDeletes = new BufferedDeletes();
}
if (infoStream.isEnabled("DWPT")) {
@@ -531,7 +492,7 @@ class DocumentsWriterPerThread {
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name +
" ramUsed=" + nf.format(startMBUsed) + " MB" +
" newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" +
- " docs/MB=" + nf.format(flushedDocCount / newSegmentSize));
+ " docs/MB=" + nf.format(flushState.segmentInfo.getDocCount() / newSegmentSize));
}
assert segmentInfo != null;
@@ -539,20 +500,21 @@ class DocumentsWriterPerThread {
FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos,
segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
sealFlushedSegment(fs);
- doAfterFlush();
success = true;
return fs;
} finally {
if (!success) {
- if (segmentInfo != null) {
- writer.flushFailed(segmentInfo);
- }
- abort();
+ abort(filesToDelete);
}
}
}
+ private final Set<String> filesToDelete = new HashSet<String>();
+
+ public Set<String> pendingFilesToDelete() {
+ return filesToDelete;
+ }
/**
* Seals the {@link SegmentInfo} for the new flushed segment and persists
* the deleted documents {@link MutableBits}.
@@ -568,12 +530,10 @@ class DocumentsWriterPerThread {
boolean success = false;
try {
+
if (indexWriterConfig.getUseCompoundFile()) {
-
- // Now build compound file
- Collection<String> oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context);
+ filesToDelete.addAll(IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context));
newSegment.info.setUseCompoundFile(true);
- writer.deleteNewFiles(oldFiles);
}
// Have codec write SegmentInfo. Must do this after
@@ -618,7 +578,6 @@ class DocumentsWriterPerThread {
infoStream.message("DWPT", "hit exception " +
"reating compound file for newly flushed segment " + newSegment.info.name);
}
- writer.flushFailed(newSegment.info);
}
}
}
@@ -671,4 +630,5 @@ class DocumentsWriterPerThread {
+ ", segment=" + (segmentInfo != null ? segmentInfo.name : "null") + ", aborting=" + aborting + ", numDocsInRAM="
+ numDocsInRAM + ", deleteQueue=" + deleteQueue + "]";
}
+
}