You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 15:19:39 UTC
svn commit: r1101016 [3/10] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/idea/solr/ dev-tools/maven/lucene/contrib/ant/
dev-tools/maven/lucene/contrib/db/bdb-je/ dev-tools/maven/luce...
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java Mon May 9 13:19:28 2011
@@ -22,9 +22,6 @@ import java.util.Collection;
import java.util.Map;
abstract class DocFieldConsumer {
-
- FieldInfos fieldInfos;
-
/** Called when DocumentsWriter decides to create a new
* segment */
abstract void flush(Map<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
@@ -39,8 +36,4 @@ abstract class DocFieldConsumer {
* The consumer should free RAM, if possible, returning
* true if any RAM was in fact freed. */
abstract boolean freeRAM();
-
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
}
-}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Mon May 9 13:19:28 2011
@@ -34,16 +34,13 @@ import java.util.HashMap;
final class DocFieldProcessor extends DocConsumer {
final DocumentsWriter docWriter;
- final FieldInfos fieldInfos;
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
this.docWriter = docWriter;
this.consumer = consumer;
- fieldInfos = docWriter.getFieldInfos();
- consumer.setFieldInfos(fieldInfos);
- fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
+ fieldsWriter = new StoredFieldsWriter(docWriter);
}
@Override
@@ -53,7 +50,6 @@ final class DocFieldProcessor extends Do
for ( DocConsumerPerThread thread : threads) {
DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread;
childThreadsAndFields.put(perThread.consumer, perThread.fields());
- perThread.trimFields(state);
}
fieldsWriter.flush(state);
consumer.flush(childThreadsAndFields, state);
@@ -63,7 +59,7 @@ final class DocFieldProcessor extends Do
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
- fieldInfos.write(state.directory, fileName);
+ state.fieldInfos.write(state.directory, fileName);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Mon May 9 13:19:28 2011
@@ -41,14 +41,13 @@ final class DocFieldProcessorPerThread e
float docBoost;
int fieldGen;
final DocFieldProcessor docFieldProcessor;
- final FieldInfos fieldInfos;
final DocFieldConsumerPerThread consumer;
// Holds all fields seen in current doc
DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
int fieldCount;
- // Hash table for all fields ever seen
+ // Hash table for all fields seen in current segment
DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
int hashMask = 1;
int totalFieldCount;
@@ -60,7 +59,6 @@ final class DocFieldProcessorPerThread e
public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
this.docState = threadState.docState;
this.docFieldProcessor = docFieldProcessor;
- this.fieldInfos = docFieldProcessor.fieldInfos;
this.consumer = docFieldProcessor.consumer.addThread(this);
fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
}
@@ -75,6 +73,7 @@ final class DocFieldProcessorPerThread e
field = next;
}
}
+ doAfterFlush();
fieldsWriter.abort();
consumer.abort();
}
@@ -92,45 +91,15 @@ final class DocFieldProcessorPerThread e
return fields;
}
- /** If there are fields we've seen but did not see again
- * in the last run, then free them up. */
-
- void trimFields(SegmentWriteState state) {
-
- for(int i=0;i<fieldHash.length;i++) {
- DocFieldProcessorPerField perField = fieldHash[i];
- DocFieldProcessorPerField lastPerField = null;
-
- while (perField != null) {
-
- if (perField.lastGen == -1) {
-
- // This field was not seen since the previous
- // flush, so, free up its resources now
-
- // Unhash
- if (lastPerField == null)
- fieldHash[i] = perField.next;
- else
- lastPerField.next = perField.next;
-
- if (state.infoStream != null) {
- state.infoStream.println(" purge field=" + perField.fieldInfo.name);
+ /** In flush we reset the fieldHash to not maintain per-field state
+ * across segments */
+ @Override
+ void doAfterFlush() {
+ fieldHash = new DocFieldProcessorPerField[2];
+ hashMask = 1;
+ totalFieldCount = 0;
}
- totalFieldCount--;
-
- } else {
- // Reset
- perField.lastGen = -1;
- lastPerField = perField;
- }
-
- perField = perField.next;
- }
- }
- }
-
private void rehash() {
final int newHashSize = (fieldHash.length*2);
assert newHashSize > fieldHash.length;
@@ -155,7 +124,7 @@ final class DocFieldProcessorPerThread e
}
@Override
- public DocumentsWriter.DocWriter processDocument() throws IOException {
+ public DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException {
consumer.startDocument();
fieldsWriter.startDocument();
@@ -193,7 +162,7 @@ final class DocFieldProcessorPerThread e
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
- FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
+ FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
fp = new DocFieldProcessorPerField(this, fi);
@@ -203,11 +172,11 @@ final class DocFieldProcessorPerThread e
if (totalFieldCount >= fieldHash.length/2)
rehash();
- } else
- fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
+ } else {
+ fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
-
+ }
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java Mon May 9 13:19:28 2011
@@ -40,13 +40,6 @@ final class DocInverter extends DocField
}
@Override
- void setFieldInfos(FieldInfos fieldInfos) {
- super.setFieldInfos(fieldInfos);
- consumer.setFieldInfos(fieldInfos);
- endConsumer.setFieldInfos(fieldInfos);
- }
-
- @Override
void flush(Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon May 9 13:19:28 2011
@@ -186,7 +186,6 @@ final class DocumentsWriter {
/**
* RAMFile buffer for DocWriters.
*/
- @SuppressWarnings("serial")
class PerDocBuffer extends RAMFile {
/**
@@ -266,34 +265,26 @@ final class DocumentsWriter {
// How much RAM we can use before flushing. This is 0 if
// we are flushing by doc count instead.
- private long ramBufferSize = (long) (IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024);
- private long waitQueuePauseBytes = (long) (ramBufferSize*0.1);
- private long waitQueueResumeBytes = (long) (ramBufferSize*0.05);
-
- // If we've allocated 5% over our RAM budget, we then
- // free down to 95%
- private long freeLevel = (long) (IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024*0.95);
-
- // Flush @ this number of docs. If ramBufferSize is
- // non-zero we will flush by RAM usage instead.
- private int maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
+
+ private final IndexWriterConfig config;
private boolean closed;
- private final FieldInfos fieldInfos;
+ private FieldInfos fieldInfos;
private final BufferedDeletesStream bufferedDeletesStream;
private final IndexWriter.FlushControl flushControl;
- DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException {
+ DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos,
+ BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
this.writer = writer;
- this.similarityProvider = writer.getConfig().getSimilarityProvider();
- this.maxThreadStates = maxThreadStates;
+ this.similarityProvider = config.getSimilarityProvider();
+ this.maxThreadStates = config.getMaxThreadStates();
this.fieldInfos = fieldInfos;
this.bufferedDeletesStream = bufferedDeletesStream;
flushControl = writer.flushControl;
-
- consumer = indexingChain.getChain(this);
+ consumer = config.getIndexingChain().getChain(this);
+ this.config = config;
}
// Buffer a specific docID for deletion. Currently only
@@ -350,10 +341,6 @@ final class DocumentsWriter {
return doFlush;
}
- public FieldInfos getFieldInfos() {
- return fieldInfos;
- }
-
/** If non-null, various details of indexing are printed
* here. */
synchronized void setInfoStream(PrintStream infoStream) {
@@ -363,45 +350,6 @@ final class DocumentsWriter {
}
}
- synchronized void setSimilarityProvider(SimilarityProvider similarity) {
- this.similarityProvider = similarity;
- for(int i=0;i<threadStates.length;i++) {
- threadStates[i].docState.similarityProvider = similarity;
- }
- }
-
- /** Set how much RAM we can use before flushing. */
- synchronized void setRAMBufferSizeMB(double mb) {
- if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- ramBufferSize = IndexWriterConfig.DISABLE_AUTO_FLUSH;
- waitQueuePauseBytes = 4*1024*1024;
- waitQueueResumeBytes = 2*1024*1024;
- } else {
- ramBufferSize = (long) (mb*1024*1024);
- waitQueuePauseBytes = (long) (ramBufferSize*0.1);
- waitQueueResumeBytes = (long) (ramBufferSize*0.05);
- freeLevel = (long) (0.95 * ramBufferSize);
- }
- }
-
- synchronized double getRAMBufferSizeMB() {
- if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- return ramBufferSize;
- } else {
- return ramBufferSize/1024./1024.;
- }
- }
-
- /** Set max buffered docs, which means we will flush by
- * doc count instead of by RAM usage. */
- void setMaxBufferedDocs(int count) {
- maxBufferedDocs = count;
- }
-
- int getMaxBufferedDocs() {
- return maxBufferedDocs;
- }
-
/** Get current segment name we are writing. */
synchronized String getSegment() {
return segment;
@@ -482,9 +430,14 @@ final class DocumentsWriter {
private void doAfterFlush() throws IOException {
// All ThreadStates should be idle when we are called
assert allThreadsIdle();
+ for (DocumentsWriterThreadState threadState : threadStates) {
+ threadState.consumer.doAfterFlush();
+ }
+
threadBindings.clear();
waitQueue.reset();
segment = null;
+ fieldInfos = new FieldInfos(fieldInfos);
numDocs = 0;
nextDocID = 0;
bufferIsFull = false;
@@ -585,10 +538,10 @@ final class DocumentsWriter {
if (infoStream != null) {
message("flush postings as segment " + segment + " numDocs=" + numDocs);
}
-
+
final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
numDocs, writer.getConfig().getTermIndexInterval(),
- SegmentCodecs.build(fieldInfos, writer.codecs),
+ fieldInfos.buildSegmentCodecs(true),
pendingDeletes);
// Apply delete-by-docID now (delete-byDocID only
// happens when an exception is hit processing that
@@ -602,7 +555,7 @@ final class DocumentsWriter {
pendingDeletes.docIDs.clear();
}
- newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false);
+ newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
Collection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
for (DocumentsWriterThreadState threadState : threadStates) {
@@ -796,7 +749,7 @@ final class DocumentsWriter {
// work
final DocWriter perDoc;
try {
- perDoc = state.consumer.processDocument();
+ perDoc = state.consumer.processDocument(fieldInfos);
} finally {
docState.clear();
}
@@ -1024,6 +977,14 @@ final class DocumentsWriter {
deletesRAMUsed = bufferedDeletesStream.bytesUsed();
+ final long ramBufferSize;
+ final double mb = config.getRAMBufferSizeMB();
+ if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+ ramBufferSize = IndexWriterConfig.DISABLE_AUTO_FLUSH;
+ } else {
+ ramBufferSize = (long) (mb*1024*1024);
+ }
+
synchronized(this) {
if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) {
return;
@@ -1052,6 +1013,8 @@ final class DocumentsWriter {
boolean any = true;
+ final long freeLevel = (long) (0.95 * ramBufferSize);
+
while(bytesUsed()+deletesRAMUsed > freeLevel) {
synchronized(this) {
@@ -1117,10 +1080,24 @@ final class DocumentsWriter {
}
synchronized boolean doResume() {
+ final double mb = config.getRAMBufferSizeMB();
+ final long waitQueueResumeBytes;
+ if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+ waitQueueResumeBytes = 2*1024*1024;
+ } else {
+ waitQueueResumeBytes = (long) (mb*1024*1024*0.05);
+ }
return waitingBytes <= waitQueueResumeBytes;
}
synchronized boolean doPause() {
+ final double mb = config.getRAMBufferSizeMB();
+ final long waitQueuePauseBytes;
+ if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
+ waitQueuePauseBytes = 4*1024*1024;
+ } else {
+ waitQueuePauseBytes = (long) (mb*1024*1024*0.1);
+ }
return waitingBytes > waitQueuePauseBytes;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfo.java Mon May 9 13:19:28 2011
@@ -19,9 +19,12 @@ package org.apache.lucene.index;
/** @lucene.experimental */
public final class FieldInfo {
- public String name;
+ public static final int UNASSIGNED_CODEC_ID = -1;
+
+ public final String name;
+ public final int number;
+
public boolean isIndexed;
- public int number;
// true if term vector for this field should be stored
boolean storeTermVector;
@@ -32,7 +35,7 @@ public final class FieldInfo {
public boolean omitTermFreqAndPositions;
public boolean storePayloads; // whether this field stores payloads together with term positions
- int codecId = 0; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
+ private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
@@ -57,12 +60,24 @@ public final class FieldInfo {
}
}
+ void setCodecId(int codecId) {
+ assert this.codecId == UNASSIGNED_CODEC_ID : "CodecId can only be set once.";
+ this.codecId = codecId;
+ }
+
+ public int getCodecId() {
+ return codecId;
+ }
+
@Override
public Object clone() {
- return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
+ FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ clone.codecId = this.codecId;
+ return clone;
}
+ // should only be called by FieldInfos#addOrUpdate
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
if (this.isIndexed != isIndexed) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon May 9 13:19:28 2011
@@ -17,16 +17,26 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
+import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.StringHelper;
-import java.io.IOException;
-import java.util.*;
-
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
@@ -34,8 +44,160 @@ import java.util.*;
* accessing this object.
* @lucene.experimental
*/
-public final class FieldInfos {
+public final class FieldInfos implements Iterable<FieldInfo> {
+ static final class FieldNumberBiMap {
+
+ final static String CODEC_NAME = "GLOBAL_FIELD_MAP";
+
+ // Initial format
+ private static final int VERSION_START = 0;
+
+ private static final int VERSION_CURRENT = VERSION_START;
+
+ private final Map<Integer,String> numberToName;
+ private final Map<String,Integer> nameToNumber;
+ private int lowestUnassignedFieldNumber = -1;
+ private long lastVersion = 0;
+ private long version = 0;
+
+ FieldNumberBiMap() {
+ this.nameToNumber = new HashMap<String, Integer>();
+ this.numberToName = new HashMap<Integer, String>();
+ }
+
+ /**
+ * Returns the global field number for the given field name. If the name
+ * does not exist yet it tries to add it with the given preferred field
+ * number assigned if possible otherwise the first unassigned field number
+ * is used as the field number.
+ */
+ synchronized int addOrGet(String fieldName, int preferredFieldNumber) {
+ Integer fieldNumber = nameToNumber.get(fieldName);
+ if (fieldNumber == null) {
+ final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber);
+
+ if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) {
+ // cool - we can use this number globally
+ fieldNumber = preferredBoxed;
+ } else {
+ // find a new FieldNumber
+ while (numberToName.containsKey(++lowestUnassignedFieldNumber)) {
+ // might not be up to date - lets do the work once needed
+ }
+ fieldNumber = lowestUnassignedFieldNumber;
+ }
+
+ version++;
+ numberToName.put(fieldNumber, fieldName);
+ nameToNumber.put(fieldName, fieldNumber);
+
+ }
+
+ return fieldNumber.intValue();
+ }
+
+ /**
+ * Sets the given field number and name if not yet set.
+ */
+ synchronized void setIfNotSet(int fieldNumber, String fieldName) {
+ final Integer boxedFieldNumber = Integer.valueOf(fieldNumber);
+ if (!numberToName.containsKey(boxedFieldNumber)
+ && !nameToNumber.containsKey(fieldName)) {
+ version++;
+ numberToName.put(boxedFieldNumber, fieldName);
+ nameToNumber.put(fieldName, boxedFieldNumber);
+ } else {
+ assert containsConsistent(boxedFieldNumber, fieldName);
+ }
+ }
+
+ /**
+ * Writes this {@link FieldNumberBiMap} to the given output and returns its
+ * version.
+ */
+ public synchronized long write(IndexOutput output) throws IOException{
+ Set<Entry<String, Integer>> entrySet = nameToNumber.entrySet();
+ CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT);
+ output.writeVInt(entrySet.size());
+ for (Entry<String, Integer> entry : entrySet) {
+ output.writeVInt(entry.getValue().intValue());
+ output.writeString(entry.getKey());
+ }
+ return version;
+ }
+ /**
+ * Reads the {@link FieldNumberBiMap} from the given input and resets the
+ * version to 0.
+ */
+ public synchronized void read(IndexInput input) throws IOException{
+ CodecUtil.checkHeader(input, CODEC_NAME,
+ VERSION_START,
+ VERSION_CURRENT);
+ final int size = input.readVInt();
+ for (int i = 0; i < size; i++) {
+ final int num = input.readVInt();
+ final String name = input.readString();
+ setIfNotSet(num, name);
+ }
+ version = lastVersion = 0;
+ }
+
+ /**
+ * Returns a new {@link FieldInfos} instance with this as the global field
+ * map
+ *
+ * @return a new {@link FieldInfos} instance with this as the global field
+ * map
+ */
+ public FieldInfos newFieldInfos(SegmentCodecsBuilder segmentCodecsBuilder) {
+ return new FieldInfos(this, segmentCodecsBuilder);
+ }
+
+ /**
+ * Returns <code>true</code> iff the last committed version differs from the
+ * current version, otherwise <code>false</code>
+ *
+ * @return <code>true</code> iff the last committed version differs from the
+ * current version, otherwise <code>false</code>
+ */
+ public synchronized boolean isDirty() {
+ return lastVersion != version;
+ }
+
+ /**
+ * commits the given version if the given version is greater than the previous committed version
+ *
+ * @param version
+ * the version to commit
+ * @return <code>true</code> iff the version was successfully committed otherwise <code>false</code>
+ * @see #write(IndexOutput)
+ */
+ public synchronized boolean commitLastVersion(long version) {
+ if (version > lastVersion) {
+ lastVersion = version;
+ return true;
+ }
+ return false;
+ }
+
+ // just for testing
+ Set<Entry<String, Integer>> entries() {
+ return new HashSet<Entry<String, Integer>>(nameToNumber.entrySet());
+ }
+
+ // used by assert
+ synchronized boolean containsConsistent(Integer number, String name) {
+ return name.equals(numberToName.get(number))
+ && number.equals(nameToNumber.get(name));
+ }
+ }
+
+ private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
+ private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+ private final FieldNumberBiMap globalFieldNumbers;
+ private final SegmentCodecsBuilder segmentCodecsBuilder;
+
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
public static final int FORMAT_PER_FIELD_CODEC = -3;
@@ -52,22 +214,55 @@ public final class FieldInfos {
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
-
- private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
- private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+
private int format;
+ /**
+ * Creates a new {@link FieldInfos} instance with a private
+ * {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap} and a default {@link SegmentCodecsBuilder}
+ * initialized with {@link CodecProvider#getDefault()}.
+ * <p>
+ * Note: this ctor should not be used during indexing use
+ * {@link FieldInfos#FieldInfos(FieldInfos)} or
+ * {@link FieldInfos#FieldInfos(FieldNumberBiMap,org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder)}
+ * instead.
+ */
public FieldInfos() {
+ this(new FieldNumberBiMap(), SegmentCodecsBuilder.create(CodecProvider.getDefault()));
+ }
+
+ /**
+ * Creates a new {@link FieldInfo} instance from the given instance. If the given instance is
+ * read-only this instance will be read-only too.
+ *
+ * @see #isReadOnly()
+ */
+ FieldInfos(FieldInfos other) {
+ this(other.globalFieldNumbers, other.segmentCodecsBuilder);
+ }
+
+ /**
+ * Creates a new FieldInfos instance with the given {@link FieldNumberBiMap}.
+ * If the {@link FieldNumberBiMap} is <code>null</code> this instance will be read-only.
+ * @see #isReadOnly()
+ */
+ FieldInfos(FieldNumberBiMap globalFieldNumbers, SegmentCodecsBuilder segmentCodecsBuilder) {
+ this.globalFieldNumbers = globalFieldNumbers;
+ this.segmentCodecsBuilder = segmentCodecsBuilder;
}
/**
* Construct a FieldInfos object using the directory and the name of the file
- * IndexInput
+ * IndexInput.
+ * <p>
+ * Note: The created instance will be read-only
+ *
* @param d The directory to open the IndexInput from
* @param name The name of the file to open the IndexInput from in the Directory
* @throws IOException
*/
public FieldInfos(Directory d, String name) throws IOException {
+ this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
IndexInput input = d.openInput(name);
try {
read(input, name);
@@ -75,36 +270,45 @@ public final class FieldInfos {
input.close();
}
}
+
+ /**
+ * adds the given field to this FieldInfos name / number mapping. The given FI
+ * must be present in the global field number mapping before this method it
+ * called
+ */
+ private void putInternal(FieldInfo fi) {
+ assert !byNumber.containsKey(fi.number);
+ assert !byName.containsKey(fi.name);
+ assert globalFieldNumbers == null || globalFieldNumbers.containsConsistent(Integer.valueOf(fi.number), fi.name);
+ byNumber.put(fi.number, fi);
+ byName.put(fi.name, fi);
+ }
+
+ private int nextFieldNumber(String name, int preferredFieldNumber) {
+ // get a global number for this field
+ final int fieldNumber = globalFieldNumbers.addOrGet(name,
+ preferredFieldNumber);
+ assert byNumber.get(fieldNumber) == null : "field number " + fieldNumber
+ + " already taken";
+ return fieldNumber;
+ }
/**
* Returns a deep clone of this FieldInfos instance.
*/
@Override
synchronized public Object clone() {
- FieldInfos fis = new FieldInfos();
- final int numField = byNumber.size();
- for(int i=0;i<numField;i++) {
- FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
- fis.byNumber.add(fi);
- fis.byName.put(fi.name, fi);
+ FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
+ for (FieldInfo fi : this) {
+ FieldInfo clone = (FieldInfo) (fi).clone();
+ fis.putInternal(clone);
}
return fis;
}
- /** Adds field info for a Document. */
- synchronized public void add(Document doc) {
- List<Fieldable> fields = doc.getFields();
- for (Fieldable field : fields) {
- add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
- field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
- }
- }
-
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
- final int numFields = byNumber.size();
- for(int i=0;i<numFields;i++) {
- final FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
}
@@ -113,17 +317,17 @@ public final class FieldInfos {
}
/**
- * Add fields that are indexed. Whether they have termvectors has to be specified.
+ * Adds or updates fields that are indexed. Whether they have termvectors has to be specified.
*
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
* @param storePositionWithTermVector true if positions should be stored.
* @param storeOffsetWithTermVector true if offsets should be stored
*/
- synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
+ synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector) {
for (String name : names) {
- add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ addOrUpdate(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
@@ -133,11 +337,11 @@ public final class FieldInfos {
* @param names The names of the fields
* @param isIndexed Whether the fields are indexed or not
*
- * @see #add(String, boolean)
+ * @see #addOrUpdate(String, boolean)
*/
- synchronized public void add(Collection<String> names, boolean isIndexed) {
+ synchronized public void addOrUpdate(Collection<String> names, boolean isIndexed) {
for (String name : names) {
- add(name, isIndexed);
+ addOrUpdate(name, isIndexed);
}
}
@@ -146,10 +350,10 @@ public final class FieldInfos {
*
* @param name The name of the Fieldable
* @param isIndexed true if the field is indexed
- * @see #add(String, boolean, boolean, boolean, boolean)
+ * @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
*/
- synchronized public void add(String name, boolean isIndexed) {
- add(name, isIndexed, false, false, false, false);
+ synchronized public void addOrUpdate(String name, boolean isIndexed) {
+ addOrUpdate(name, isIndexed, false, false, false, false);
}
/**
@@ -159,8 +363,8 @@ public final class FieldInfos {
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
- add(name, isIndexed, storeTermVector, false, false, false);
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector){
+ addOrUpdate(name, isIndexed, storeTermVector, false, false, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -174,10 +378,10 @@ public final class FieldInfos {
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
- add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -192,9 +396,9 @@ public final class FieldInfos {
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
- add(name, isIndexed, storeTermVector, storePositionWithTermVector,
+ addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, false);
}
@@ -212,33 +416,56 @@ public final class FieldInfos {
* @param storePayloads true if payloads should be stored for this field
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
*/
- synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+ return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
+ boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+ if (globalFieldNumbers == null) {
+ throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
+ }
+ assert segmentCodecsBuilder != null : "SegmentCodecsBuilder is set to null but FieldInfos is not read-only";
FieldInfo fi = fieldInfo(name);
if (fi == null) {
- return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
+ fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
+ if (fi.isIndexed && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+ segmentCodecsBuilder.tryAddAndSet(fi);
+ }
return fi;
}
synchronized public FieldInfo add(FieldInfo fi) {
- return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ // IMPORTANT - reuse the field number if possible for consistent field numbers across segments
+ return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
fi.omitTermFreqAndPositions);
}
-
- private FieldInfo addInternal(String name, boolean isIndexed,
+
+ /*
+ * NOTE: if you call this method from a public method make sure you check if we are modifiable and throw an exception otherwise
+ */
+ private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+ // don't check modifiable here since we use that to initially build up FIs
name = StringHelper.intern(name);
- FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+ if (globalFieldNumbers != null) {
+ globalFieldNumbers.setIfNotSet(fieldNumber, name);
+ }
+ final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- byNumber.add(fi);
- byName.put(name, fi);
+
+ putInternal(fi);
return fi;
}
@@ -248,7 +475,7 @@ public final class FieldInfos {
}
public FieldInfo fieldInfo(String fieldName) {
- return byName.get(fieldName);
+ return byName.get(fieldName);
}
/**
@@ -259,8 +486,8 @@ public final class FieldInfos {
* with the given number doesn't exist.
*/
public String fieldName(int fieldNumber) {
- FieldInfo fi = fieldInfo(fieldNumber);
- return (fi != null) ? fi.name : "";
+ FieldInfo fi = fieldInfo(fieldNumber);
+ return (fi != null) ? fi.name : "";
}
/**
@@ -273,13 +500,18 @@ public final class FieldInfos {
return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
}
+ public Iterator<FieldInfo> iterator() {
+ return byNumber.values().iterator();
+ }
+
public int size() {
+ assert byNumber.size() == byName.size();
return byNumber.size();
}
public boolean hasVectors() {
- for (int i = 0; i < size(); i++) {
- if (fieldInfo(i).storeTermVector) {
+ for (FieldInfo fi : this) {
+ if (fi.storeTermVector) {
return true;
}
}
@@ -287,13 +519,29 @@ public final class FieldInfos {
}
public boolean hasNorms() {
- for (int i = 0; i < size(); i++) {
- if (!fieldInfo(i).omitNorms) {
+ for (FieldInfo fi : this) {
+ if (!fi.omitNorms) {
return true;
}
}
return false;
}
+
+ /**
+ * Builds the {@link SegmentCodecs} mapping for this {@link FieldInfos} instance.
+ * @param clearBuilder <code>true</code> iff the internal {@link SegmentCodecsBuilder} must be cleared otherwise <code>false</code>
+ */
+ public SegmentCodecs buildSegmentCodecs(boolean clearBuilder) {
+ if (globalFieldNumbers == null) {
+ throw new IllegalStateException("FieldInfos are read-only no SegmentCodecs available");
+ }
+ assert segmentCodecsBuilder != null;
+ final SegmentCodecs segmentCodecs = segmentCodecsBuilder.build();
+ if (clearBuilder) {
+ segmentCodecsBuilder.clear();
+ }
+ return segmentCodecs;
+ }
public void write(Directory d, String name) throws IOException {
IndexOutput output = d.createOutput(name);
@@ -303,12 +551,22 @@ public final class FieldInfos {
output.close();
}
}
+
+ /**
+ * Returns <code>true</code> iff this instance is not backed by a
+ * {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap}. Instances read from a directory via
+ * {@link FieldInfos#FieldInfos(Directory, String)} will always be read-only
+ * since no {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap} is supplied, otherwise
+ * <code>false</code>.
+ */
+ public final boolean isReadOnly() {
+ return globalFieldNumbers == null;
+ }
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
- for (int i = 0; i < size(); i++) {
- FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@@ -318,7 +576,8 @@ public final class FieldInfos {
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
output.writeString(fi.name);
- output.writeInt(fi.codecId);
+ output.writeInt(fi.number);
+ output.writeInt(fi.getCodecId());
output.writeByte(bits);
}
}
@@ -338,6 +597,7 @@ public final class FieldInfos {
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
// if this is a previous format codec 0 will be preflex!
+ final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
@@ -347,8 +607,8 @@ public final class FieldInfos {
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
- final FieldInfo addInternal = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- addInternal.codecId = codecId;
+ final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ addInternal.setCodecId(codecId);
}
if (input.getFilePointer() != input.length()) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Mon May 9 13:19:28 2011
@@ -45,14 +45,12 @@ final class FieldsWriter {
// If null - we were supplied with streams, if notnull - we manage them ourselves
private Directory directory;
private String segment;
- private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
- FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
+ FieldsWriter(Directory directory, String segment) throws IOException {
this.directory = directory;
this.segment = segment;
- fieldInfos = fn;
boolean success = false;
try {
@@ -70,10 +68,9 @@ final class FieldsWriter {
}
}
- FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
+ FieldsWriter(IndexOutput fdx, IndexOutput fdt) {
directory = null;
segment = null;
- fieldInfos = fn;
fieldsStream = fdt;
indexStream = fdx;
}
@@ -166,7 +163,7 @@ final class FieldsWriter {
assert fieldsStream.getFilePointer() == position;
}
- final void addDocument(Document doc) throws IOException {
+ final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Mon May 9 13:19:28 2011
@@ -78,6 +78,10 @@ public final class IndexFileNames {
/** Extension of separate norms */
public static final String SEPARATE_NORMS_EXTENSION = "s";
+
+ /** Extension of global field numbers */
+ public static final String GLOBAL_FIELD_NUM_MAP_EXTENSION = "fnx";
+
/**
* This array contains all filename extensions used by
@@ -98,6 +102,7 @@ public final class IndexFileNames {
GEN_EXTENSION,
NORMS_EXTENSION,
COMPOUND_FILE_STORE_EXTENSION,
+ GLOBAL_FIELD_NUM_MAP_EXTENSION,
};
public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java Mon May 9 13:19:28 2011
@@ -35,10 +35,11 @@ import java.util.concurrent.ConcurrentHa
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
@@ -221,6 +222,7 @@ public class IndexWriter implements Clos
volatile long pendingCommitChangeCount;
final SegmentInfos segmentInfos; // the segments
+ final FieldNumberBiMap globalFieldNumberMap;
private DocumentsWriter docWriter;
private IndexFileDeleter deleter;
@@ -230,8 +232,6 @@ public class IndexWriter implements Clos
private Lock writeLock;
- private final int termIndexInterval;
-
private boolean closed;
private boolean closing;
@@ -689,10 +689,9 @@ public class IndexWriter implements Clos
/**
* Constructs a new IndexWriter per the settings given in <code>conf</code>.
- * Note that the passed in {@link IndexWriterConfig} is cloned and thus making
- * changes to it after IndexWriter has been instantiated will not affect
- * IndexWriter. Additionally, calling {@link #getConfig()} and changing the
- * parameters does not affect that IndexWriter instance.
+ * Note that the passed in {@link IndexWriterConfig} is
+ * privately cloned; if you need to make subsequent "live"
+ * changes to the configuration use {@link #getConfig}.
* <p>
*
* @param d
@@ -718,11 +717,9 @@ public class IndexWriter implements Clos
directory = d;
analyzer = conf.getAnalyzer();
infoStream = defaultInfoStream;
- termIndexInterval = conf.getTermIndexInterval();
mergePolicy = conf.getMergePolicy();
mergePolicy.setIndexWriter(this);
mergeScheduler = conf.getMergeScheduler();
- mergedSegmentWarmer = conf.getMergedSegmentWarmer();
codecs = conf.getCodecProvider();
bufferedDeletesStream = new BufferedDeletesStream(messageID);
@@ -791,7 +788,10 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream);
+ // start with previous field numbers, but new FieldInfos
+ globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
+ docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(),
+ globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -809,10 +809,6 @@ public class IndexWriter implements Clos
segmentInfos.changed();
}
- docWriter.setRAMBufferSizeMB(conf.getRAMBufferSizeMB());
- docWriter.setMaxBufferedDocs(conf.getMaxBufferedDocs());
- pushMaxBufferedDocs();
-
if (infoStream != null) {
message("init: create=" + create);
messageState();
@@ -834,85 +830,26 @@ public class IndexWriter implements Clos
}
}
}
-
- private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
- Directory cfsDir = null;
- try {
- if (info.getUseCompoundFile()) {
- cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
- } else {
- cfsDir = directory;
- }
- return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.FIELD_INFOS_EXTENSION));
- } finally {
- if (info.getUseCompoundFile() && cfsDir != null) {
- cfsDir.close();
- }
- }
- }
-
- private FieldInfos getCurrentFieldInfos() throws IOException {
- final FieldInfos fieldInfos;
- if (segmentInfos.size() > 0) {
- if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- // Pre-4.0 index. In this case we sweep all
- // segments, merging their FieldInfos:
- fieldInfos = new FieldInfos();
- for(SegmentInfo info : segmentInfos) {
- final FieldInfos segFieldInfos = getFieldInfos(info);
- final int fieldCount = segFieldInfos.size();
- for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
- fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
- }
- }
- } else {
- // Already a 4.0 index; just seed the FieldInfos
- // from the last segment
- fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
- }
- } else {
- fieldInfos = new FieldInfos();
- }
- return fieldInfos;
- }
-
+
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
}
/**
- * Returns the {@link IndexWriterConfig} that was passed to
- * {@link #IndexWriter(Directory, IndexWriterConfig)}. This allows querying
- * IndexWriter's settings.
+ * Returns the private {@link IndexWriterConfig}, cloned
+ * from the {@link IndexWriterConfig} passed to
+ * {@link #IndexWriter(Directory, IndexWriterConfig)}.
* <p>
- * <b>NOTE:</b> setting any parameter on the returned instance has not effect
- * on the IndexWriter instance. If you need to change those settings after
- * IndexWriter has been created, you need to instantiate a new IndexWriter.
+ * <b>NOTE:</b> some settings may be changed on the
+ * returned {@link IndexWriterConfig}, and will take
+ * effect in the current IndexWriter instance. See the
+ * javadocs for the specific setters in {@link
+ * IndexWriterConfig} for details.
*/
public IndexWriterConfig getConfig() {
return config;
}
- /**
- * If we are flushing by doc count (not by RAM usage), and
- * using LogDocMergePolicy then push maxBufferedDocs down
- * as its minMergeDocs, to keep backwards compatibility.
- */
- private void pushMaxBufferedDocs() {
- if (docWriter.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- final MergePolicy mp = mergePolicy;
- if (mp instanceof LogDocMergePolicy) {
- LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
- final int maxBufferedDocs = docWriter.getMaxBufferedDocs();
- if (lmp.getMinMergeDocs() != maxBufferedDocs) {
- if (infoStream != null)
- message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
- lmp.setMinMergeDocs(maxBufferedDocs);
- }
- }
- }
- }
-
/** If non-null, this will be the default infoStream used
* by a newly instantiated IndexWriter.
* @see #setInfoStream
@@ -1441,6 +1378,11 @@ public class IndexWriter implements Clos
}
// for test purpose
+ final synchronized Collection<String> getIndexFileNames() throws IOException {
+ return segmentInfos.files(directory, true);
+ }
+
+ // for test purpose
final synchronized int getDocCount(int i) {
if (i >= 0 && i < segmentInfos.size()) {
return segmentInfos.info(i).docCount;
@@ -1476,8 +1418,8 @@ public class IndexWriter implements Clos
/** If non-null, information about merges will be printed to this.
*/
- private PrintStream infoStream = null;
- private static PrintStream defaultInfoStream = null;
+ private PrintStream infoStream;
+ private static PrintStream defaultInfoStream;
/**
* Requests an "optimize" operation on an index, priming the index
@@ -2181,7 +2123,7 @@ public class IndexWriter implements Clos
docCount += info.docCount;
String newSegName = newSegmentName();
String dsName = info.getDocStoreSegment();
-
+
if (infoStream != null) {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
@@ -2269,19 +2211,24 @@ public class IndexWriter implements Clos
ensureOpen();
try {
+ if (infoStream != null)
+ message("flush at addIndexes(IndexReader...)");
+ flush(false, true);
+
String mergedName = newSegmentName();
- SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
+ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
mergedName, null, codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
int docCount = merger.merge(); // merge 'em
-
+ final FieldInfos fieldInfos = merger.fieldInfos();
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
- false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
- merger.fieldInfos().hasVectors());
+ false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
+ fieldInfos.hasVectors(),
+ fieldInfos);
setDiagnostics(info, "addIndexes(IndexReader...)");
boolean useCompoundFile;
@@ -3014,7 +2961,7 @@ public class IndexWriter implements Clos
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
@@ -3163,9 +3110,9 @@ public class IndexWriter implements Clos
SegmentInfos sourceSegments = merge.segments;
- SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
+ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ merge.info.getFieldInfos());
if (infoStream != null) {
message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
@@ -3174,8 +3121,6 @@ public class IndexWriter implements Clos
merge.readers = new ArrayList<SegmentReader>();
merge.readerClones = new ArrayList<SegmentReader>();
- merge.info.setHasVectors(merger.fieldInfos().hasVectors());
-
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -3217,6 +3162,8 @@ public class IndexWriter implements Clos
// Record which codec was used to write the segment
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
+ // Record if we have merged vectors
+ merge.info.setHasVectors(merger.fieldInfos().hasVectors());
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
@@ -3292,6 +3239,8 @@ public class IndexWriter implements Clos
merge.info.setUseCompoundFile(true);
}
+ final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
+
final int termsIndexDivisor;
final boolean loadDocStores;
@@ -3572,8 +3521,6 @@ public class IndexWriter implements Clos
public abstract void warm(IndexReader reader) throws IOException;
}
- private IndexReaderWarmer mergedSegmentWarmer;
-
private void handleOOM(OutOfMemoryError oom, String location) {
if (infoStream != null) {
message("hit OutOfMemoryError inside " + location);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Mon May 9 13:19:28 2011
@@ -26,12 +26,16 @@ import org.apache.lucene.search.Similari
import org.apache.lucene.util.Version;
/**
- * Holds all the configuration of {@link IndexWriter}. This object is only used
- * while constructing a new IndexWriter. Those settings cannot be changed
- * afterwards, except instantiating a new IndexWriter.
+ * Holds all the configuration of {@link IndexWriter}. You
+ * should instantiate this class, call the setters to set
+ * your configuration, then pass it to {@link IndexWriter}.
+ * Note that {@link IndexWriter} makes a private clone; if
+ * you need to subsequently change settings use {@link
+ * IndexWriter#getConfig}.
+ *
* <p>
* All setter methods return {@link IndexWriterConfig} to allow chaining
- * settings conveniently. Thus someone can do:
+ * settings conveniently, for example:
*
* <pre>
* IndexWriterConfig conf = new IndexWriterConfig(analyzer);
@@ -108,26 +112,25 @@ public final class IndexWriterConfig imp
return WRITE_LOCK_TIMEOUT;
}
- private Analyzer analyzer;
- private IndexDeletionPolicy delPolicy;
- private IndexCommit commit;
- private OpenMode openMode;
- private SimilarityProvider similarityProvider;
- private int termIndexInterval; // TODO: this should be private to the codec, not settable here
- private MergeScheduler mergeScheduler;
- private long writeLockTimeout;
- private int maxBufferedDeleteTerms;
- private double ramBufferSizeMB;
- private int maxBufferedDocs;
- private IndexingChain indexingChain;
- private IndexReaderWarmer mergedSegmentWarmer;
- private CodecProvider codecProvider;
- private MergePolicy mergePolicy;
- private int maxThreadStates;
- private boolean readerPooling;
- private int readerTermsIndexDivisor;
+ private final Analyzer analyzer;
+ private volatile IndexDeletionPolicy delPolicy;
+ private volatile IndexCommit commit;
+ private volatile OpenMode openMode;
+ private volatile SimilarityProvider similarityProvider;
+ private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
+ private volatile MergeScheduler mergeScheduler;
+ private volatile long writeLockTimeout;
+ private volatile int maxBufferedDeleteTerms;
+ private volatile double ramBufferSizeMB;
+ private volatile int maxBufferedDocs;
+ private volatile IndexingChain indexingChain;
+ private volatile IndexReaderWarmer mergedSegmentWarmer;
+ private volatile CodecProvider codecProvider;
+ private volatile MergePolicy mergePolicy;
+ private volatile int maxThreadStates;
+ private volatile boolean readerPooling;
+ private volatile int readerTermsIndexDivisor;
- // required for clone
private Version matchVersion;
/**
@@ -162,7 +165,7 @@ public final class IndexWriterConfig imp
@Override
public Object clone() {
// Shallow clone is the only thing that's possible, since parameters like
- // analyzer, index commit etc. do not implemnt Cloneable.
+ // analyzer, index commit etc. do not implement Cloneable.
try {
return super.clone();
} catch (CloneNotSupportedException e) {
@@ -176,7 +179,9 @@ public final class IndexWriterConfig imp
return analyzer;
}
- /** Specifies {@link OpenMode} of that index. */
+ /** Specifies {@link OpenMode} of the index.
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setOpenMode(OpenMode openMode) {
this.openMode = openMode;
return this;
@@ -201,6 +206,8 @@ public final class IndexWriterConfig imp
* <p>
* <b>NOTE:</b> the deletion policy cannot be null. If <code>null</code> is
* passed, the deletion policy will be set to the default.
+ *
+ * <p>Only takes effect when IndexWriter is first created.
*/
public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
@@ -219,7 +226,8 @@ public final class IndexWriterConfig imp
/**
* Expert: allows to open a certain commit point. The default is null which
* opens the latest commit point.
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setIndexCommit(IndexCommit commit) {
this.commit = commit;
return this;
@@ -239,7 +247,8 @@ public final class IndexWriterConfig imp
* <p>
* <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
* the similarity provider will be set to the default implementation (unspecified).
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
return this;
@@ -274,7 +283,9 @@ public final class IndexWriterConfig imp
* must be scanned for each random term access.
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
- */
+ *
+ * <p>Takes effect immediately, but only applies to newly
+ * flushed/merged segments. */
public IndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
this.termIndexInterval = interval;
return this;
@@ -295,7 +306,8 @@ public final class IndexWriterConfig imp
* <p>
* <b>NOTE:</b> the merge scheduler cannot be null. If <code>null</code> is
* passed, the merge scheduler will be set to the default.
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
return this;
@@ -313,7 +325,8 @@ public final class IndexWriterConfig imp
* Sets the maximum time to wait for a write lock (in milliseconds) for this
* instance. You can change the default value for all instances by calling
* {@link #setDefaultWriteLockTimeout(long)}.
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
this.writeLockTimeout = writeLockTimeout;
return this;
@@ -339,6 +352,9 @@ public final class IndexWriterConfig imp
* @throws IllegalArgumentException if maxBufferedDeleteTerms
* is enabled but smaller than 1
* @see #setRAMBufferSizeMB
+ *
+ * <p>Takes effect immediately, but only the next time a
+ * document is added, updated or deleted.
*/
public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
@@ -391,6 +407,9 @@ public final class IndexWriterConfig imp
* <p>
* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
*
+ * <p>Takes effect immediately, but only the next time a
+ * document is added, updated or deleted.
+ *
* @throws IllegalArgumentException
* if ramBufferSize is enabled but non-positive, or it disables
* ramBufferSize when maxBufferedDocs is already disabled
@@ -430,6 +449,9 @@ public final class IndexWriterConfig imp
* <p>
* Disabled by default (writer flushes by RAM usage).
*
+ * <p>Takes effect immediately, but only the next time a
+ * document is added, updated or deleted.
+ *
* @see #setRAMBufferSizeMB(double)
*
* @throws IllegalArgumentException
@@ -458,7 +480,9 @@ public final class IndexWriterConfig imp
return maxBufferedDocs;
}
- /** Set the merged segment warmer. See {@link IndexReaderWarmer}. */
+ /** Set the merged segment warmer. See {@link IndexReaderWarmer}.
+ *
+ * <p>Takes effect on the next merge. */
public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
this.mergedSegmentWarmer = mergeSegmentWarmer;
return this;
@@ -475,13 +499,16 @@ public final class IndexWriterConfig imp
* and return a {@link MergePolicy.MergeSpecification} describing the merges.
* It also selects merges to do for optimize(). (The default is
* {@link LogByteSizeMergePolicy}.
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) {
this.mergePolicy = mergePolicy == null ? new LogByteSizeMergePolicy() : mergePolicy;
return this;
}
- /** Set the CodecProvider. See {@link CodecProvider}. */
+ /** Set the CodecProvider. See {@link CodecProvider}.
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setCodecProvider(CodecProvider codecProvider) {
this.codecProvider = codecProvider;
return this;
@@ -507,7 +534,8 @@ public final class IndexWriterConfig imp
* at once in IndexWriter. Values < 1 are invalid and if passed
* <code>maxThreadStates</code> will be set to
* {@link #DEFAULT_MAX_THREAD_STATES}.
- */
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
return this;
@@ -526,7 +554,9 @@ public final class IndexWriterConfig imp
* This method lets you enable pooling without getting a
* near-real-time reader. NOTE: if you set this to
* false, IndexWriter will still pool readers once
- * {@link IndexWriter#getReader} is called. */
+ * {@link IndexWriter#getReader} is called.
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setReaderPooling(boolean readerPooling) {
this.readerPooling = readerPooling;
return this;
@@ -538,7 +568,9 @@ public final class IndexWriterConfig imp
return readerPooling;
}
- /** Expert: sets the {@link DocConsumer} chain to be used to process documents. */
+ /** Expert: sets the {@link DocConsumer} chain to be used to process documents.
+ *
+ * <p>Only takes effect when IndexWriter is first created. */
IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
return this;
@@ -555,7 +587,10 @@ public final class IndexWriterConfig imp
* IndexWriter#getReader}. If you pass -1, the terms index
* won't be loaded by the readers. This is only useful in
* advanced situations when you will only .next() through
- * all terms; attempts to seek will hit an exception. */
+ * all terms; attempts to seek will hit an exception.
+ *
+ * <p>Takes effect immediately, but only applies to
+ * readers opened after this call */
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
if (divisor <= 0 && divisor != -1) {
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Mon May 9 13:19:28 2011
@@ -35,10 +35,4 @@ abstract class InvertedDocConsumer {
/** Attempt to free RAM, returning true if any RAM was
* freed */
abstract boolean freeRAM();
-
- FieldInfos fieldInfos;
-
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
}
-}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Mon May 9 13:19:28 2011
@@ -25,5 +25,4 @@ abstract class InvertedDocEndConsumer {
abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
abstract void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
abstract void abort();
- abstract void setFieldInfos(FieldInfos fieldInfos);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java Mon May 9 13:19:28 2011
@@ -36,7 +36,6 @@ import org.apache.lucene.store.IndexOutp
final class NormsWriter extends InvertedDocEndConsumer {
- private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
return new NormsWriterPerThread(docInverterPerThread, this);
@@ -48,11 +47,6 @@ final class NormsWriter extends Inverted
// We only write the _X.nrm file at flush
void files(Collection<String> files) {}
- @Override
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
- }
-
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
@@ -60,7 +54,7 @@ final class NormsWriter extends Inverted
final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
- if (!fieldInfos.hasNorms()) {
+ if (!state.fieldInfos.hasNorms()) {
return;
}
@@ -96,15 +90,10 @@ final class NormsWriter extends Inverted
try {
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
- final int numField = fieldInfos.size();
-
int normCount = 0;
- for(int fieldNumber=0;fieldNumber<numField;fieldNumber++) {
-
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
-
- List<NormsWriterPerField> toMerge = byField.get(fieldInfo);
+ for (FieldInfo fi : state.fieldInfos) {
+ final List<NormsWriterPerField> toMerge = byField.get(fi);
int upto = 0;
if (toMerge != null) {
@@ -158,7 +147,7 @@ final class NormsWriter extends Inverted
// Fill final hole with defaultNorm
for(;upto<state.numDocs;upto++)
normsOut.writeByte((byte) 0);
- } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
+ } else if (fi.isIndexed && !fi.omitNorms) {
normCount++;
// Fill entire field with default norm:
for(;upto<state.numDocs;upto++)
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Mon May 9 13:19:28 2011
@@ -67,7 +67,8 @@ final class PerFieldCodecWrapper extends
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
- final FieldsConsumer fields = consumers.get(field.codecId);
+ assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+ final FieldsConsumer fields = consumers.get(field.getCodecId());
return fields.addField(field);
}
@@ -100,18 +101,17 @@ final class PerFieldCodecWrapper extends
public FieldsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo si,
int readBufferSize, int indexDivisor) throws IOException {
- final int fieldCount = fieldInfos.size();
final Map<Codec, FieldsProducer> producers = new HashMap<Codec, FieldsProducer>();
boolean success = false;
try {
- for (int i = 0; i < fieldCount; i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed) { // TODO this does not work for non-indexed fields
fields.add(fi.name);
- Codec codec = segmentCodecs.codecs[fi.codecId];
+ assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+ Codec codec = segmentCodecs.codecs[fi.getCodecId()];
if (!producers.containsKey(codec)) {
producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,
- si, fieldInfos, readBufferSize, indexDivisor, ""+fi.codecId)));
+ si, fieldInfos, readBufferSize, indexDivisor, ""+fi.getCodecId())));
}
codecs.put(fi.name, producers.get(codec));
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java?rev=1101016&r1=1101015&r2=1101016&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java Mon May 9 13:19:28 2011
@@ -38,17 +38,16 @@ import org.apache.lucene.store.IndexOutp
* {@link SegmentWriteState} for each flush and is maintained in the
* corresponding {@link SegmentInfo} until it is committed.
* <p>
- * {@link SegmentCodecs#build(FieldInfos, CodecProvider)} should be used to
- * create a {@link SegmentCodecs} instance during {@link IndexWriter} sessions
- * which creates the ordering of distinct codecs and assigns the
- * {@link FieldInfo#codecId} or in other words, the ord of the codec maintained
- * inside {@link SegmentCodecs}, to the {@link FieldInfo}. This ord is valid
- * only until the current segment is flushed and {@link FieldInfos} for that
- * segment are written including the ord for each field. This ord is later used
- * to get the right codec when the segment is opened in a reader. The
- * {@link Codec} returned from {@link SegmentCodecs#codec()} in turn uses
- * {@link SegmentCodecs} internal structure to select and initialize the right
- * codec for a fields when it is written.
+ * During indexing {@link FieldInfos} uses {@link SegmentCodecsBuilder} to incrementally
+ * build the {@link SegmentCodecs} mapping. Once a segment is flushed
+ * DocumentsWriter creates a {@link SegmentCodecs} instance from
+ * {@link FieldInfos#buildSegmentCodecs(boolean)} The {@link FieldInfo#codecId}
+ * assigned by {@link SegmentCodecsBuilder} refers to the codecs ordinal
+ * maintained inside {@link SegmentCodecs}. This ord is later used to get the
+ * right codec when the segment is opened in a reader.The {@link Codec} returned
+ * from {@link SegmentCodecs#codec()} in turn uses {@link SegmentCodecs}
+ * internal structure to select and initialize the right codec for a fields when
+ * it is written.
* <p>
* Once a flush succeeded the {@link SegmentCodecs} is maintained inside the
* {@link SegmentInfo} for the flushed segment it was created for.
@@ -64,38 +63,19 @@ final class SegmentCodecs implements Clo
* internal structure to map codecs to fields - don't modify this from outside
* of this class!
*/
- Codec[] codecs;
+ final Codec[] codecs;
final CodecProvider provider;
private final Codec codec = new PerFieldCodecWrapper(this);
-
+
+ SegmentCodecs(CodecProvider provider, IndexInput input) throws IOException {
+ this(provider, read(input, provider));
+ }
+
SegmentCodecs(CodecProvider provider, Codec... codecs) {
this.provider = provider;
this.codecs = codecs;
}
- static SegmentCodecs build(FieldInfos infos, CodecProvider provider) {
- final int size = infos.size();
- final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
- final ArrayList<Codec> codecs = new ArrayList<Codec>();
-
- for (int i = 0; i < size; i++) {
- final FieldInfo info = infos.fieldInfo(i);
- if (info.isIndexed) {
- final Codec fieldCodec = provider.lookup(provider
- .getFieldCodec(info.name));
- Integer ord = codecRegistry.get(fieldCodec);
- if (ord == null) {
- ord = Integer.valueOf(codecs.size());
- codecRegistry.put(fieldCodec, ord);
- codecs.add(fieldCodec);
- }
- info.codecId = ord.intValue();
- }
- }
- return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
-
- }
-
Codec codec() {
return codec;
}
@@ -107,7 +87,7 @@ final class SegmentCodecs implements Clo
}
}
- void read(IndexInput in) throws IOException {
+ private static Codec[] read(IndexInput in, CodecProvider provider) throws IOException {
final int size = in.readVInt();
final ArrayList<Codec> list = new ArrayList<Codec>();
for (int i = 0; i < size; i++) {
@@ -115,7 +95,7 @@ final class SegmentCodecs implements Clo
final Codec lookup = provider.lookup(codecName);
list.add(i, lookup);
}
- codecs = list.toArray(Codec.EMPTY);
+ return list.toArray(Codec.EMPTY);
}
void files(Directory dir, SegmentInfo info, Set<String> files)
@@ -131,4 +111,58 @@ final class SegmentCodecs implements Clo
public String toString() {
return "SegmentCodecs [codecs=" + Arrays.toString(codecs) + ", provider=" + provider + "]";
}
+
+ /**
+ * Used in {@link FieldInfos} to incrementally build the codec ID mapping for
+ * {@link FieldInfo} instances.
+ * <p>
+ * Note: this class is not thread-safe
+ * </p>
+ * @see FieldInfo#getCodecId()
+ */
+ final static class SegmentCodecsBuilder {
+ private final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
+ private final ArrayList<Codec> codecs = new ArrayList<Codec>();
+ private final CodecProvider provider;
+
+ private SegmentCodecsBuilder(CodecProvider provider) {
+ this.provider = provider;
+ }
+
+ static SegmentCodecsBuilder create(CodecProvider provider) {
+ return new SegmentCodecsBuilder(provider);
+ }
+
+ SegmentCodecsBuilder tryAddAndSet(FieldInfo fi) {
+ if (fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+ final Codec fieldCodec = provider.lookup(provider
+ .getFieldCodec(fi.name));
+ Integer ord = codecRegistry.get(fieldCodec);
+ if (ord == null) {
+ ord = Integer.valueOf(codecs.size());
+ codecRegistry.put(fieldCodec, ord);
+ codecs.add(fieldCodec);
+ }
+ fi.setCodecId(ord.intValue());
+ }
+ return this;
+ }
+
+ SegmentCodecsBuilder addAll(FieldInfos infos) {
+ for (FieldInfo fieldInfo : infos) {
+ tryAddAndSet(fieldInfo);
+ }
+ return this;
+ }
+
+ SegmentCodecs build() {
+ return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
+ }
+
+ SegmentCodecsBuilder clear() {
+ codecRegistry.clear();
+ codecs.clear();
+ return this;
+ }
+ }
}
\ No newline at end of file