You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/03/22 12:12:10 UTC
svn commit: r1084134 [1/2] - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/
src/java/org/apache/lucene/index/codecs/preflex/
src/test-framework/org/apache/lucene/util/ src/test/org/apache/luce...
Author: simonw
Date: Tue Mar 22 11:12:09 2011
New Revision: 1084134
URL: http://svn.apache.org/viewvc?rev=1084134&view=rev
Log:
LUCENE-2881: Track global field numbers across IW-sessions
Added:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestGlobalFieldNumbers.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHash.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Mar 22 11:12:09 2011
@@ -162,6 +162,13 @@ Changes in Runtime Behavior
say "in order". If this is a problem then you can use either of the
LogMergePolicy impls, and call setRequireContiguousMerge(true).
(Mike McCandless)
+
+* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
+ per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
+ properties from all previous segments combined. Field numbers are now tracked
+ globally across IndexWriter sessions and persisted into a X.fnx file on
+ successful commit. The corresponding file format changes are backwards-
+ compatible. (Michael Busch, Simon Willnauer)
API Changes
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java Tue Mar 22 11:12:09 2011
@@ -27,7 +27,8 @@ abstract class DocConsumerPerThread {
* DocumentsWriter.DocWriter and return it.
* DocumentsWriter then calls finish() on this object
* when it's its turn. */
- abstract DocumentsWriter.DocWriter processDocument() throws IOException;
+ abstract DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException;
+ abstract void doAfterFlush();
abstract void abort();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java Tue Mar 22 11:12:09 2011
@@ -22,9 +22,6 @@ import java.util.Collection;
import java.util.Map;
abstract class DocFieldConsumer {
-
- FieldInfos fieldInfos;
-
/** Called when DocumentsWriter decides to create a new
* segment */
abstract void flush(Map<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
@@ -39,8 +36,4 @@ abstract class DocFieldConsumer {
* The consumer should free RAM, if possible, returning
* true if any RAM was in fact freed. */
abstract boolean freeRAM();
-
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
}
-}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Tue Mar 22 11:12:09 2011
@@ -34,16 +34,13 @@ import java.util.HashMap;
final class DocFieldProcessor extends DocConsumer {
final DocumentsWriter docWriter;
- final FieldInfos fieldInfos;
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
this.docWriter = docWriter;
this.consumer = consumer;
- fieldInfos = docWriter.getFieldInfos();
- consumer.setFieldInfos(fieldInfos);
- fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
+ fieldsWriter = new StoredFieldsWriter(docWriter);
}
@Override
@@ -53,7 +50,6 @@ final class DocFieldProcessor extends Do
for ( DocConsumerPerThread thread : threads) {
DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread;
childThreadsAndFields.put(perThread.consumer, perThread.fields());
- perThread.trimFields(state);
}
fieldsWriter.flush(state);
consumer.flush(childThreadsAndFields, state);
@@ -63,7 +59,7 @@ final class DocFieldProcessor extends Do
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
- fieldInfos.write(state.directory, fileName);
+ state.fieldInfos.write(state.directory, fileName);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Tue Mar 22 11:12:09 2011
@@ -41,14 +41,13 @@ final class DocFieldProcessorPerThread e
float docBoost;
int fieldGen;
final DocFieldProcessor docFieldProcessor;
- final FieldInfos fieldInfos;
final DocFieldConsumerPerThread consumer;
// Holds all fields seen in current doc
DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
int fieldCount;
- // Hash table for all fields ever seen
+ // Hash table for all fields seen in current segment
DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
int hashMask = 1;
int totalFieldCount;
@@ -60,7 +59,6 @@ final class DocFieldProcessorPerThread e
public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
this.docState = threadState.docState;
this.docFieldProcessor = docFieldProcessor;
- this.fieldInfos = docFieldProcessor.fieldInfos;
this.consumer = docFieldProcessor.consumer.addThread(this);
fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
}
@@ -75,6 +73,7 @@ final class DocFieldProcessorPerThread e
field = next;
}
}
+ doAfterFlush();
fieldsWriter.abort();
consumer.abort();
}
@@ -92,45 +91,15 @@ final class DocFieldProcessorPerThread e
return fields;
}
- /** If there are fields we've seen but did not see again
- * in the last run, then free them up. */
-
- void trimFields(SegmentWriteState state) {
-
- for(int i=0;i<fieldHash.length;i++) {
- DocFieldProcessorPerField perField = fieldHash[i];
- DocFieldProcessorPerField lastPerField = null;
-
- while (perField != null) {
-
- if (perField.lastGen == -1) {
-
- // This field was not seen since the previous
- // flush, so, free up its resources now
-
- // Unhash
- if (lastPerField == null)
- fieldHash[i] = perField.next;
- else
- lastPerField.next = perField.next;
-
- if (state.infoStream != null) {
- state.infoStream.println(" purge field=" + perField.fieldInfo.name);
+ /** In flush we reset the fieldHash to not maintain per-field state
+ * across segments */
+ @Override
+ void doAfterFlush() {
+ fieldHash = new DocFieldProcessorPerField[2];
+ hashMask = 1;
+ totalFieldCount = 0;
}
- totalFieldCount--;
-
- } else {
- // Reset
- perField.lastGen = -1;
- lastPerField = perField;
- }
-
- perField = perField.next;
- }
- }
- }
-
private void rehash() {
final int newHashSize = (fieldHash.length*2);
assert newHashSize > fieldHash.length;
@@ -155,7 +124,7 @@ final class DocFieldProcessorPerThread e
}
@Override
- public DocumentsWriter.DocWriter processDocument() throws IOException {
+ public DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException {
consumer.startDocument();
fieldsWriter.startDocument();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverter.java Tue Mar 22 11:12:09 2011
@@ -40,13 +40,6 @@ final class DocInverter extends DocField
}
@Override
- void setFieldInfos(FieldInfos fieldInfos) {
- super.setFieldInfos(fieldInfos);
- consumer.setFieldInfos(fieldInfos);
- endConsumer.setFieldInfos(fieldInfos);
- }
-
- @Override
void flush(Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Tue Mar 22 11:12:09 2011
@@ -186,7 +186,6 @@ final class DocumentsWriter {
/**
* RAMFile buffer for DocWriters.
*/
- @SuppressWarnings("serial")
class PerDocBuffer extends RAMFile {
/**
@@ -270,12 +269,13 @@ final class DocumentsWriter {
private final IndexWriterConfig config;
private boolean closed;
- private final FieldInfos fieldInfos;
+ private FieldInfos fieldInfos;
private final BufferedDeletesStream bufferedDeletesStream;
private final IndexWriter.FlushControl flushControl;
- DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException {
+ DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos,
+ BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarityProvider = config.getSimilarityProvider();
@@ -342,10 +342,6 @@ final class DocumentsWriter {
return doFlush;
}
- public FieldInfos getFieldInfos() {
- return fieldInfos;
- }
-
/** If non-null, various details of indexing are printed
* here. */
synchronized void setInfoStream(PrintStream infoStream) {
@@ -435,9 +431,14 @@ final class DocumentsWriter {
private void doAfterFlush() throws IOException {
// All ThreadStates should be idle when we are called
assert allThreadsIdle();
+ for (DocumentsWriterThreadState threadState : threadStates) {
+ threadState.consumer.doAfterFlush();
+ }
+
threadBindings.clear();
waitQueue.reset();
segment = null;
+ fieldInfos = new FieldInfos(fieldInfos);
numDocs = 0;
nextDocID = 0;
bufferIsFull = false;
@@ -555,7 +556,7 @@ final class DocumentsWriter {
pendingDeletes.docIDs.clear();
}
- newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false);
+ newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
Collection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
for (DocumentsWriterThreadState threadState : threadStates) {
@@ -749,7 +750,7 @@ final class DocumentsWriter {
// work
final DocWriter perDoc;
try {
- perDoc = state.consumer.processDocument();
+ perDoc = state.consumer.processDocument(fieldInfos);
} finally {
docState.clear();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Tue Mar 22 11:12:09 2011
@@ -32,7 +32,7 @@ public final class FieldInfo {
public boolean omitTermFreqAndPositions;
public boolean storePayloads; // whether this field stores payloads together with term positions
- int codecId = 0; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
+ private int codecId = -1; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
@@ -57,10 +57,21 @@ public final class FieldInfo {
}
}
+ public void setCodecId(int codecId) {
+ assert this.codecId == -1 : "CodecId can only be set once.";
+ this.codecId = codecId;
+ }
+
+ public int getCodecId() {
+ return codecId;
+ }
+
@Override
public Object clone() {
- return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
+ FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ clone.codecId = this.codecId;
+ return clone;
}
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Tue Mar 22 11:12:09 2011
@@ -17,16 +17,23 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.StringHelper;
-import java.io.IOException;
-import java.util.*;
-
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
@@ -34,8 +41,159 @@ import java.util.*;
* accessing this object.
* @lucene.experimental
*/
-public final class FieldInfos {
+public final class FieldInfos implements Iterable<FieldInfo> {
+ static final class FieldNumberBiMap {
+
+ final static String CODEC_NAME = "GLOBAL_FIELD_MAP";
+
+ // Initial format
+ private static final int VERSION_START = 0;
+
+ private static final int VERSION_CURRENT = VERSION_START;
+
+ private final Map<Integer,String> numberToName;
+ private final Map<String,Integer> nameToNumber;
+ private int lowestUnassignedFieldNumber = -1;
+ private long lastVersion = 0;
+ private long version = 0;
+
+ FieldNumberBiMap() {
+ this.nameToNumber = new HashMap<String, Integer>();
+ this.numberToName = new HashMap<Integer, String>();
+ }
+
+ /**
+ * Returns the global field number for the given field name. If the name
+ * does not exist yet it tries to add it with the given preferred field
+ * number assigned if possible otherwise the first unassigned field number
+ * is used as the field number.
+ */
+ synchronized int addOrGet(String fieldName, int preferredFieldNumber) {
+ Integer fieldNumber = nameToNumber.get(fieldName);
+ if (fieldNumber == null) {
+ final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber);
+
+ if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) {
+ // cool - we can use this number globally
+ fieldNumber = preferredBoxed;
+ } else {
+ // find a new FieldNumber
+ while (numberToName.containsKey(++lowestUnassignedFieldNumber)) {
+ // might not be up to date - lets do the work once needed
+ }
+ fieldNumber = lowestUnassignedFieldNumber;
+ }
+
+ version++;
+ numberToName.put(fieldNumber, fieldName);
+ nameToNumber.put(fieldName, fieldNumber);
+
+ }
+
+ return fieldNumber.intValue();
+ }
+
+ /**
+ * Sets the given field number and name if not yet set.
+ */
+ synchronized void setIfNotSet(int fieldNumber, String fieldName) {
+ final Integer boxedFieldNumber = Integer.valueOf(fieldNumber);
+ if (!numberToName.containsKey(boxedFieldNumber)
+ && !nameToNumber.containsKey(fieldName)) {
+ version++;
+ numberToName.put(boxedFieldNumber, fieldName);
+ nameToNumber.put(fieldName, boxedFieldNumber);
+ } else {
+ assert containsConsistent(boxedFieldNumber, fieldName);
+ }
+ }
+
+ /**
+ * Writes this {@link FieldNumberBiMap} to the given output and returns its
+ * version.
+ */
+ public synchronized long write(IndexOutput output) throws IOException{
+ Set<Entry<String, Integer>> entrySet = nameToNumber.entrySet();
+ CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT);
+ output.writeVInt(entrySet.size());
+ for (Entry<String, Integer> entry : entrySet) {
+ output.writeVInt(entry.getValue().intValue());
+ output.writeString(entry.getKey());
+ }
+ return version;
+ }
+ /**
+ * Reads the {@link FieldNumberBiMap} from the given input and resets the
+ * version to 0.
+ */
+ public synchronized void read(IndexInput input) throws IOException{
+ CodecUtil.checkHeader(input, CODEC_NAME,
+ VERSION_START,
+ VERSION_CURRENT);
+ final int size = input.readVInt();
+ for (int i = 0; i < size; i++) {
+ final int num = input.readVInt();
+ final String name = input.readString();
+ setIfNotSet(num, name);
+ }
+ version = lastVersion = 0;
+ }
+
+ /**
+ * Returns a new {@link FieldInfos} instance with this as the global field
+ * map
+ *
+ * @return a new {@link FieldInfos} instance with this as the global field
+ * map
+ */
+ public FieldInfos newFieldInfos() {
+ return new FieldInfos(this);
+ }
+
+ /**
+ * Returns <code>true</code> iff the last committed version differs from the
+ * current version, otherwise <code>false</code>
+ *
+ * @return <code>true</code> iff the last committed version differs from the
+ * current version, otherwise <code>false</code>
+ */
+ public synchronized boolean isDirty() {
+ return lastVersion != version;
+ }
+
+ /**
+ * commits the given version if the given version is greater than the previous committed version
+ *
+ * @param version
+ * the version to commit
+ * @return <code>true</code> iff the version was successfully committed otherwise <code>false</code>
+ * @see #write(IndexOutput)
+ */
+ public synchronized boolean commitLastVersion(long version) {
+ if (version > lastVersion) {
+ lastVersion = version;
+ return true;
+ }
+ return false;
+ }
+
+ // just for testing
+ Set<Entry<String, Integer>> entries() {
+ return new HashSet<Entry<String, Integer>>(nameToNumber.entrySet());
+ }
+
+ // used by assert
+ boolean containsConsistent(Integer number, String name) {
+ return name.equals(numberToName.get(number))
+ && number.equals(nameToNumber.get(name));
+ }
+ }
+
+ private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
+ private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+ private final FieldNumberBiMap globalFieldNumbers;
+
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
public static final int FORMAT_PER_FIELD_CODEC = -3;
@@ -52,12 +210,19 @@ public final class FieldInfos {
static final byte OMIT_NORMS = 0x10;
static final byte STORE_PAYLOADS = 0x20;
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
-
- private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
- private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+
private int format;
public FieldInfos() {
+ this(new FieldNumberBiMap());
+ }
+
+ FieldInfos(FieldInfos other) {
+ this(other.globalFieldNumbers);
+ }
+
+ FieldInfos(FieldNumberBiMap globalFieldNumbers) {
+ this.globalFieldNumbers = globalFieldNumbers;
}
/**
@@ -68,6 +233,14 @@ public final class FieldInfos {
* @throws IOException
*/
public FieldInfos(Directory d, String name) throws IOException {
+ this(new FieldNumberBiMap());
+ /*
+ * TODO: in the read case we create a FNBM for each FIs which is a wast of resources.
+ * Yet, we must not seed this with a global map since due to addIndexes(Dir) we could
+ * have non-matching field numbers. we should use a null FNBM here and set the FIs
+ * to READ-ONLY once this ctor is done. Each modificator should check if we are readonly
+ * with an assert
+ */
IndexInput input = d.openInput(name);
try {
read(input, name);
@@ -75,36 +248,45 @@ public final class FieldInfos {
input.close();
}
}
+
+ /**
+ * adds the given field to this FieldInfos name / number mapping. The given FI
+ * must be present in the global field number mapping before this method it
+ * called
+ */
+ private void putInternal(FieldInfo fi) {
+ assert !byNumber.containsKey(fi.number);
+ assert !byName.containsKey(fi.name);
+ assert globalFieldNumbers.containsConsistent(Integer.valueOf(fi.number), fi.name);
+ byNumber.put(fi.number, fi);
+ byName.put(fi.name, fi);
+ }
+
+ private int nextFieldNumber(String name, int preferredFieldNumber) {
+ // get a global number for this field
+ final int fieldNumber = globalFieldNumbers.addOrGet(name,
+ preferredFieldNumber);
+ assert byNumber.get(fieldNumber) == null : "field number " + fieldNumber
+ + " already taken";
+ return fieldNumber;
+ }
/**
* Returns a deep clone of this FieldInfos instance.
*/
@Override
synchronized public Object clone() {
- FieldInfos fis = new FieldInfos();
- final int numField = byNumber.size();
- for(int i=0;i<numField;i++) {
- FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
- fis.byNumber.add(fi);
- fis.byName.put(fi.name, fi);
+ FieldInfos fis = new FieldInfos(globalFieldNumbers);
+ for (FieldInfo fi : this) {
+ FieldInfo clone = (FieldInfo) (fi).clone();
+ fis.putInternal(clone);
}
return fis;
}
- /** Adds field info for a Document. */
- synchronized public void add(Document doc) {
- List<Fieldable> fields = doc.getFields();
- for (Fieldable field : fields) {
- add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
- field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
- }
- }
-
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
- final int numFields = byNumber.size();
- for(int i=0;i<numFields;i++) {
- final FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
}
@@ -215,9 +397,18 @@ public final class FieldInfos {
synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+ return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
+ boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+
FieldInfo fi = fieldInfo(name);
if (fi == null) {
- return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
+ return addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
@@ -225,20 +416,23 @@ public final class FieldInfos {
}
synchronized public FieldInfo add(FieldInfo fi) {
- return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ // IMPORTANT - reuse the field number if possible for consistent field numbers across segments
+ return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
fi.omitNorms, fi.storePayloads,
fi.omitTermFreqAndPositions);
}
- private FieldInfo addInternal(String name, boolean isIndexed,
+ private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
name = StringHelper.intern(name);
- FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+ globalFieldNumbers.setIfNotSet(fieldNumber, name);
+ FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- byNumber.add(fi);
- byName.put(name, fi);
+
+ assert byNumber.get(fi.number) == null;
+ putInternal(fi);
return fi;
}
@@ -248,7 +442,7 @@ public final class FieldInfos {
}
public FieldInfo fieldInfo(String fieldName) {
- return byName.get(fieldName);
+ return byName.get(fieldName);
}
/**
@@ -273,13 +467,18 @@ public final class FieldInfos {
return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
}
+ public Iterator<FieldInfo> iterator() {
+ return byNumber.values().iterator();
+ }
+
public int size() {
+ assert byNumber.size() == byName.size();
return byNumber.size();
}
public boolean hasVectors() {
- for (int i = 0; i < size(); i++) {
- if (fieldInfo(i).storeTermVector) {
+ for (FieldInfo fi : this) {
+ if (fi.storeTermVector) {
return true;
}
}
@@ -287,8 +486,8 @@ public final class FieldInfos {
}
public boolean hasNorms() {
- for (int i = 0; i < size(); i++) {
- if (!fieldInfo(i).omitNorms) {
+ for (FieldInfo fi : this) {
+ if (!fi.omitNorms) {
return true;
}
}
@@ -307,8 +506,7 @@ public final class FieldInfos {
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
output.writeVInt(size());
- for (int i = 0; i < size(); i++) {
- FieldInfo fi = fieldInfo(i);
+ for (FieldInfo fi : this) {
byte bits = 0x0;
if (fi.isIndexed) bits |= IS_INDEXED;
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@@ -318,7 +516,8 @@ public final class FieldInfos {
if (fi.storePayloads) bits |= STORE_PAYLOADS;
if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
output.writeString(fi.name);
- output.writeInt(fi.codecId);
+ output.writeInt(fi.number);
+ output.writeInt(fi.getCodecId());
output.writeByte(bits);
}
}
@@ -338,6 +537,7 @@ public final class FieldInfos {
for (int i = 0; i < size; i++) {
String name = StringHelper.intern(input.readString());
// if this is a previous format codec 0 will be preflex!
+ final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
byte bits = input.readByte();
boolean isIndexed = (bits & IS_INDEXED) != 0;
@@ -347,8 +547,8 @@ public final class FieldInfos {
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
- final FieldInfo addInternal = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
- addInternal.codecId = codecId;
+ final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ addInternal.setCodecId(codecId);
}
if (input.getFilePointer() != input.length()) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Tue Mar 22 11:12:09 2011
@@ -45,14 +45,12 @@ final class FieldsWriter {
// If null - we were supplied with streams, if notnull - we manage them ourselves
private Directory directory;
private String segment;
- private FieldInfos fieldInfos;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
- FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
+ FieldsWriter(Directory directory, String segment) throws IOException {
this.directory = directory;
this.segment = segment;
- fieldInfos = fn;
boolean success = false;
try {
@@ -70,10 +68,9 @@ final class FieldsWriter {
}
}
- FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
+ FieldsWriter(IndexOutput fdx, IndexOutput fdt) {
directory = null;
segment = null;
- fieldInfos = fn;
fieldsStream = fdt;
indexStream = fdx;
}
@@ -166,7 +163,7 @@ final class FieldsWriter {
assert fieldsStream.getFilePointer() == position;
}
- final void addDocument(Document doc) throws IOException {
+ final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Tue Mar 22 11:12:09 2011
@@ -78,6 +78,10 @@ public final class IndexFileNames {
/** Extension of separate norms */
public static final String SEPARATE_NORMS_EXTENSION = "s";
+
+ /** Extension of global field numbers */
+ public static final String GLOBAL_FIELD_NUM_MAP_EXTENSION = "fnx";
+
/**
* This array contains all filename extensions used by
@@ -98,6 +102,7 @@ public final class IndexFileNames {
GEN_EXTENSION,
NORMS_EXTENSION,
COMPOUND_FILE_STORE_EXTENSION,
+ GLOBAL_FIELD_NUM_MAP_EXTENSION,
};
public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Tue Mar 22 11:12:09 2011
@@ -35,10 +35,10 @@ import java.util.concurrent.ConcurrentHa
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
@@ -221,6 +221,7 @@ public class IndexWriter implements Clos
volatile long pendingCommitChangeCount;
final SegmentInfos segmentInfos; // the segments
+ final FieldNumberBiMap globalFieldNumberMap;
private DocumentsWriter docWriter;
private IndexFileDeleter deleter;
@@ -786,7 +787,10 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(config, directory, this, getCurrentFieldInfos(), bufferedDeletesStream);
+ // start with previous field numbers, but new FieldInfos
+ globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
+ docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(),
+ globalFieldNumberMap.newFieldInfos(), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -825,48 +829,7 @@ public class IndexWriter implements Clos
}
}
}
-
- private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
- Directory cfsDir = null;
- try {
- if (info.getUseCompoundFile()) {
- cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
- } else {
- cfsDir = directory;
- }
- return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.FIELD_INFOS_EXTENSION));
- } finally {
- if (info.getUseCompoundFile() && cfsDir != null) {
- cfsDir.close();
- }
- }
- }
-
- private FieldInfos getCurrentFieldInfos() throws IOException {
- final FieldInfos fieldInfos;
- if (segmentInfos.size() > 0) {
- if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
- // Pre-4.0 index. In this case we sweep all
- // segments, merging their FieldInfos:
- fieldInfos = new FieldInfos();
- for(SegmentInfo info : segmentInfos) {
- final FieldInfos segFieldInfos = getFieldInfos(info);
- final int fieldCount = segFieldInfos.size();
- for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
- fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
- }
- }
- } else {
- // Already a 4.0 index; just seed the FieldInfos
- // from the last segment
- fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
- }
- } else {
- fieldInfos = new FieldInfos();
- }
- return fieldInfos;
- }
-
+
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
}
@@ -2154,7 +2117,7 @@ public class IndexWriter implements Clos
docCount += info.docCount;
String newSegName = newSegmentName();
String dsName = info.getDocStoreSegment();
-
+
if (infoStream != null) {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
@@ -2245,16 +2208,17 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
mergedName, null, codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ globalFieldNumberMap.newFieldInfos());
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
int docCount = merger.merge(); // merge 'em
-
+ final FieldInfos fieldInfos = merger.fieldInfos();
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
- false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
- merger.fieldInfos().hasVectors());
+ false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
+ fieldInfos.hasVectors(),
+ fieldInfos);
setDiagnostics(info, "addIndexes(IndexReader...)");
boolean useCompoundFile;
@@ -2987,7 +2951,7 @@ public class IndexWriter implements Clos
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos());
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
@@ -3138,7 +3102,7 @@ public class IndexWriter implements Clos
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
codecs, payloadProcessorProvider,
- ((FieldInfos) docWriter.getFieldInfos().clone()));
+ merge.info.getFieldInfos());
if (infoStream != null) {
message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
@@ -3147,8 +3111,6 @@ public class IndexWriter implements Clos
merge.readers = new ArrayList<SegmentReader>();
merge.readerClones = new ArrayList<SegmentReader>();
- merge.info.setHasVectors(merger.fieldInfos().hasVectors());
-
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -3190,6 +3152,8 @@ public class IndexWriter implements Clos
// Record which codec was used to write the segment
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
+ // Record if we have merged vectors
+ merge.info.setHasVectors(merger.fieldInfos().hasVectors());
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Tue Mar 22 11:12:09 2011
@@ -35,10 +35,4 @@ abstract class InvertedDocConsumer {
/** Attempt to free RAM, returning true if any RAM was
* freed */
abstract boolean freeRAM();
-
- FieldInfos fieldInfos;
-
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
}
-}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Tue Mar 22 11:12:09 2011
@@ -25,5 +25,4 @@ abstract class InvertedDocEndConsumer {
abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
abstract void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
abstract void abort();
- abstract void setFieldInfos(FieldInfos fieldInfos);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriter.java Tue Mar 22 11:12:09 2011
@@ -36,7 +36,6 @@ import org.apache.lucene.store.IndexOutp
final class NormsWriter extends InvertedDocEndConsumer {
- private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
return new NormsWriterPerThread(docInverterPerThread, this);
@@ -48,11 +47,6 @@ final class NormsWriter extends Inverted
// We only write the _X.nrm file at flush
void files(Collection<String> files) {}
- @Override
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
- }
-
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
@@ -60,7 +54,7 @@ final class NormsWriter extends Inverted
final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
- if (!fieldInfos.hasNorms()) {
+ if (!state.fieldInfos.hasNorms()) {
return;
}
@@ -96,15 +90,10 @@ final class NormsWriter extends Inverted
try {
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
- final int numField = fieldInfos.size();
-
int normCount = 0;
- for(int fieldNumber=0;fieldNumber<numField;fieldNumber++) {
-
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
-
- List<NormsWriterPerField> toMerge = byField.get(fieldInfo);
+ for (FieldInfo fi : state.fieldInfos) {
+ final List<NormsWriterPerField> toMerge = byField.get(fi);
int upto = 0;
if (toMerge != null) {
@@ -158,7 +147,7 @@ final class NormsWriter extends Inverted
// Fill final hole with defaultNorm
for(;upto<state.numDocs;upto++)
normsOut.writeByte((byte) 0);
- } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
+ } else if (fi.isIndexed && !fi.omitNorms) {
normCount++;
// Fill entire field with default norm:
for(;upto<state.numDocs;upto++)
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Tue Mar 22 11:12:09 2011
@@ -67,7 +67,7 @@ final class PerFieldCodecWrapper extends
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
- final FieldsConsumer fields = consumers.get(field.codecId);
+ final FieldsConsumer fields = consumers.get(field.getCodecId());
return fields.addField(field);
}
@@ -100,18 +100,16 @@ final class PerFieldCodecWrapper extends
public FieldsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo si,
int readBufferSize, int indexDivisor) throws IOException {
- final int fieldCount = fieldInfos.size();
final Map<Codec, FieldsProducer> producers = new HashMap<Codec, FieldsProducer>();
boolean success = false;
try {
- for (int i = 0; i < fieldCount; i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed) { // TODO this does not work for non-indexed fields
fields.add(fi.name);
- Codec codec = segmentCodecs.codecs[fi.codecId];
+ Codec codec = segmentCodecs.codecs[fi.getCodecId()];
if (!producers.containsKey(codec)) {
producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,
- si, fieldInfos, readBufferSize, indexDivisor, ""+fi.codecId)));
+ si, fieldInfos, readBufferSize, indexDivisor, ""+fi.getCodecId())));
}
codecs.put(fi.name, producers.get(codec));
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java Tue Mar 22 11:12:09 2011
@@ -74,22 +74,20 @@ final class SegmentCodecs implements Clo
}
static SegmentCodecs build(FieldInfos infos, CodecProvider provider) {
- final int size = infos.size();
final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
final ArrayList<Codec> codecs = new ArrayList<Codec>();
- for (int i = 0; i < size; i++) {
- final FieldInfo info = infos.fieldInfo(i);
- if (info.isIndexed) {
+ for (FieldInfo fi : infos) {
+ if (fi.isIndexed) {
final Codec fieldCodec = provider.lookup(provider
- .getFieldCodec(info.name));
+ .getFieldCodec(fi.name));
Integer ord = codecRegistry.get(fieldCodec);
if (ord == null) {
ord = Integer.valueOf(codecs.size());
codecRegistry.put(fieldCodec, ord);
codecs.add(fieldCodec);
}
- info.codecId = ord.intValue();
+ fi.setCodecId(ord.intValue());
}
}
return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Tue Mar 22 11:12:09 2011
@@ -17,21 +17,22 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
-import java.util.HashSet;
-import java.util.HashMap;
-import java.util.ArrayList;
+
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Constants;
/**
* Information about a segment such as it's name, directory, and files related
@@ -62,7 +63,7 @@ public final class SegmentInfo {
* - NO says this field has no separate norms
* >= YES says this field has separate norms with the specified generation
*/
- private long[] normGen;
+ private Map<Integer,Long> normGen;
private boolean isCompoundFile;
@@ -83,6 +84,8 @@ public final class SegmentInfo {
private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
private boolean hasVectors; // True if this segment wrote term vectors
+
+ private FieldInfos fieldInfos;
private SegmentCodecs segmentCodecs;
@@ -100,7 +103,7 @@ public final class SegmentInfo {
private long bufferedDeletesGen;
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
- boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
+ boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
@@ -113,13 +116,14 @@ public final class SegmentInfo {
this.hasVectors = hasVectors;
delCount = 0;
version = Constants.LUCENE_MAIN_VERSION;
+ this.fieldInfos = fieldInfos;
}
/**
* Copy everything from src SegmentInfo into our instance.
*/
void reset(SegmentInfo src) {
- clearFiles();
+ clearFilesCache();
version = src.version;
name = src.name;
docCount = src.docCount;
@@ -130,11 +134,14 @@ public final class SegmentInfo {
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
hasVectors = src.hasVectors;
hasProx = src.hasProx;
+ fieldInfos = src.fieldInfos == null ? null : (FieldInfos) src.fieldInfos.clone();
if (src.normGen == null) {
normGen = null;
} else {
- normGen = new long[src.normGen.length];
- System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
+ normGen = new HashMap<Integer, Long>(src.normGen.size());
+ for (Entry<Integer,Long> entry : src.normGen.entrySet()) {
+ normGen.put(entry.getKey(), entry.getValue());
+ }
}
isCompoundFile = src.isCompoundFile;
delCount = src.delCount;
@@ -184,9 +191,14 @@ public final class SegmentInfo {
if (numNormGen == NO) {
normGen = null;
} else {
- normGen = new long[numNormGen];
+ normGen = new HashMap<Integer, Long>();
for(int j=0;j<numNormGen;j++) {
- normGen[j] = input.readLong();
+ int fieldNumber = j;
+ if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
+ fieldNumber = input.readInt();
+ }
+
+ normGen.put(fieldNumber, input.readLong());
}
}
isCompoundFile = input.readByte() == YES;
@@ -237,6 +249,24 @@ public final class SegmentInfo {
}
}
}
+
+ synchronized void loadFieldInfos(Directory dir, boolean checkCompoundFile) throws IOException {
+ if (fieldInfos == null) {
+ Directory dir0 = dir;
+ if (isCompoundFile && checkCompoundFile) {
+ dir0 = new CompoundFileReader(dir, IndexFileNames.segmentFileName(name,
+ "", IndexFileNames.COMPOUND_FILE_EXTENSION));
+ }
+ try {
+ fieldInfos = new FieldInfos(dir0, IndexFileNames.segmentFileName(name,
+ "", IndexFileNames.FIELD_INFOS_EXTENSION));
+ } finally {
+ if (dir != dir0) {
+ dir0.close();
+ }
+ }
+ }
+ }
/**
* Returns total size in bytes of all of files used by this segment (if
@@ -280,7 +310,12 @@ public final class SegmentInfo {
public void setHasVectors(boolean v) {
hasVectors = v;
- clearFiles();
+ clearFilesCache();
+ }
+
+ public FieldInfos getFieldInfos() throws IOException {
+ loadFieldInfos(dir, true);
+ return fieldInfos;
}
public boolean hasDeletions() {
@@ -298,17 +333,18 @@ public final class SegmentInfo {
} else {
delGen++;
}
- clearFiles();
+ clearFilesCache();
}
void clearDelGen() {
delGen = NO;
- clearFiles();
+ clearFilesCache();
}
@Override
public Object clone() {
- SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, false);
+ final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+ fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
si.docStoreIsCompoundFile = docStoreIsCompoundFile;
@@ -316,9 +352,11 @@ public final class SegmentInfo {
si.delCount = delCount;
si.diagnostics = new HashMap<String, String>(diagnostics);
if (normGen != null) {
- si.normGen = normGen.clone();
+ si.normGen = new HashMap<Integer, Long>();
+ for (Entry<Integer,Long> entry : normGen.entrySet()) {
+ si.normGen.put(entry.getKey(), entry.getValue());
+ }
}
- si.hasVectors = hasVectors;
si.version = version;
return si;
}
@@ -339,7 +377,12 @@ public final class SegmentInfo {
* @param fieldNumber the field index to check
*/
public boolean hasSeparateNorms(int fieldNumber) {
- return normGen != null && normGen[fieldNumber] != NO;
+ if (normGen == null) {
+ return false;
+ }
+
+ Long gen = normGen.get(fieldNumber);
+ return gen != null && gen.longValue() != NO;
}
/**
@@ -349,7 +392,7 @@ public final class SegmentInfo {
if (normGen == null) {
return false;
} else {
- for (long fieldNormGen : normGen) {
+ for (long fieldNormGen : normGen.values()) {
if (fieldNormGen >= YES) {
return true;
}
@@ -359,10 +402,9 @@ public final class SegmentInfo {
return false;
}
- void initNormGen(int numFields) {
+ void initNormGen() {
if (normGen == null) { // normGen is null if this segments file hasn't had any norms set against it yet
- normGen = new long[numFields];
- Arrays.fill(normGen, NO);
+ normGen = new HashMap<Integer, Long>();
}
}
@@ -373,12 +415,13 @@ public final class SegmentInfo {
* @param fieldIndex field whose norm file will be rewritten
*/
void advanceNormGen(int fieldIndex) {
- if (normGen[fieldIndex] == NO) {
- normGen[fieldIndex] = YES;
+ Long gen = normGen.get(fieldIndex);
+ if (gen == null || gen.longValue() == NO) {
+ normGen.put(fieldIndex, new Long(YES));
} else {
- normGen[fieldIndex]++;
+ normGen.put(fieldIndex, gen+1);
}
- clearFiles();
+ clearFilesCache();
}
/**
@@ -388,7 +431,7 @@ public final class SegmentInfo {
*/
public String getNormFileName(int number) {
if (hasSeparateNorms(number)) {
- return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen[number]);
+ return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
} else {
// single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
@@ -403,7 +446,7 @@ public final class SegmentInfo {
*/
void setUseCompoundFile(boolean isCompoundFile) {
this.isCompoundFile = isCompoundFile;
- clearFiles();
+ clearFilesCache();
}
/**
@@ -433,7 +476,7 @@ public final class SegmentInfo {
void setDocStoreIsCompoundFile(boolean v) {
docStoreIsCompoundFile = v;
- clearFiles();
+ clearFilesCache();
}
public String getDocStoreSegment() {
@@ -446,14 +489,14 @@ public final class SegmentInfo {
void setDocStoreOffset(int offset) {
docStoreOffset = offset;
- clearFiles();
+ clearFilesCache();
}
void setDocStore(int offset, String segment, boolean isCompoundFile) {
docStoreOffset = offset;
docStoreSegment = segment;
docStoreIsCompoundFile = isCompoundFile;
- clearFiles();
+ clearFilesCache();
}
/** Save this segment's info. */
@@ -474,9 +517,10 @@ public final class SegmentInfo {
if (normGen == null) {
output.writeInt(NO);
} else {
- output.writeInt(normGen.length);
- for (long fieldNormGen : normGen) {
- output.writeLong(fieldNormGen);
+ output.writeInt(normGen.size());
+ for (Entry<Integer,Long> entry : normGen.entrySet()) {
+ output.writeInt(entry.getKey());
+ output.writeLong(entry.getValue());
}
}
@@ -490,7 +534,7 @@ public final class SegmentInfo {
void setHasProx(boolean hasProx) {
this.hasProx = hasProx;
- clearFiles();
+ clearFilesCache();
}
public boolean getHasProx() {
@@ -572,11 +616,11 @@ public final class SegmentInfo {
}
if (normGen != null) {
- for (int i = 0; i < normGen.length; i++) {
- long gen = normGen[i];
+ for (Entry<Integer,Long> entry : normGen.entrySet()) {
+ long gen = entry.getValue();
if (gen >= YES) {
// Definitely a separate norm file, with generation:
- fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
+ fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + entry.getKey(), gen));
}
}
}
@@ -588,7 +632,7 @@ public final class SegmentInfo {
/* Called whenever any change is made that affects which
* files this segment has. */
- private void clearFiles() {
+ private void clearFilesCache() {
files = null;
sizeInBytesNoStore = -1;
sizeInBytesWithStore = -1;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Tue Mar 22 11:12:09 2011
@@ -17,25 +17,27 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.NoSuchDirectoryException;
-import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.SegmentInfosReader;
-import org.apache.lucene.index.codecs.SegmentInfosWriter;
-import org.apache.lucene.util.ThreadInterruptedException;
-
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.Vector;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashSet;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Vector;
+
+import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NoSuchDirectoryException;
+import org.apache.lucene.util.ThreadInterruptedException;
/**
* A collection of segmentInfo objects with methods for operating on
@@ -64,6 +66,11 @@ public final class SegmentInfos extends
* starting with the current time in milliseconds forces to create unique version numbers.
*/
public long version = System.currentTimeMillis();
+
+ private long globalFieldMapVersion = 0; // version of the GFNM for the next commit
+ private long lastGlobalFieldMapVersion = 0; // version of the GFNM file we last successfully read or wrote
+ private long pendingMapVersion = -1; // version of the GFNM itself that we have last successfully written
+ // or -1 if we it was not written. This is set during prepareCommit
private long generation = 0; // generation of the "segments_N" for the next commit
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
@@ -75,6 +82,8 @@ public final class SegmentInfos extends
private CodecProvider codecs;
private int format;
+
+ private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
/**
* If non-null, information about loading segments_N files
@@ -171,6 +180,15 @@ public final class SegmentInfos extends
"",
lastGeneration);
}
+
+ private String getGlobalFieldNumberName(long version) {
+ /*
+ * This creates a file name ${version}.fnx without a leading underscore
+ * since this file might belong to more than one segment (global map) and
+ * could otherwise easily be confused with a per-segment file.
+ */
+ return IndexFileNames.segmentFileName(""+ version, "", IndexFileNames.GLOBAL_FIELD_NUM_MAP_EXTENSION);
+ }
/**
* Parse the generation off the segments file name and
@@ -261,6 +279,8 @@ public final class SegmentInfos extends
return null;
}
}.run();
+ // either we are on 4.0 or we don't have a lastGlobalFieldMapVersion i.e. its still set to 0
+ assert DefaultSegmentInfosWriter.FORMAT_4_0 <= format || (DefaultSegmentInfosWriter.FORMAT_4_0 > format && lastGlobalFieldMapVersion == 0);
}
// Only non-null after prepareCommit has been called and
@@ -270,15 +290,24 @@ public final class SegmentInfos extends
private void write(Directory directory) throws IOException {
String segmentFileName = getNextSegmentFileName();
-
+ final String globalFieldMapFile;
+ if (globalFieldNumberMap != null && globalFieldNumberMap.isDirty()) {
+ globalFieldMapFile = getGlobalFieldNumberName(++globalFieldMapVersion);
+ pendingMapVersion = writeGlobalFieldMap(globalFieldNumberMap, directory, globalFieldMapFile);
+ } else {
+ globalFieldMapFile = null;
+ }
+
+
// Always advance the generation on write:
if (generation == -1) {
generation = 1;
} else {
generation++;
}
-
+
IndexOutput segnOutput = null;
+
boolean success = false;
@@ -304,6 +333,16 @@ public final class SegmentInfos extends
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
}
+ if (globalFieldMapFile != null) { // delete if written here
+ try {
+ // Try not to leave global field map in
+ // the index:
+ directory.deleteFile(globalFieldMapFile);
+ } catch (Throwable t) {
+ // Suppress so we keep throwing the original exception
+ }
+ }
+ pendingMapVersion = -1;
}
}
}
@@ -719,6 +758,8 @@ public final class SegmentInfos extends
void updateGeneration(SegmentInfos other) {
lastGeneration = other.lastGeneration;
generation = other.generation;
+ lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
+ globalFieldMapVersion = other.globalFieldMapVersion;
}
final void rollbackCommit(Directory dir) throws IOException {
@@ -742,6 +783,16 @@ public final class SegmentInfos extends
// in our caller
}
pendingSegnOutput = null;
+ if (pendingMapVersion != -1) {
+ try {
+ final String fieldMapName = getGlobalFieldNumberName(globalFieldMapVersion--);
+ dir.deleteFile(fieldMapName);
+ } catch (Throwable t) {
+ // Suppress so we keep throwing the original exception
+ // in our caller
+ }
+ pendingMapVersion = -1;
+ }
}
}
@@ -760,6 +811,40 @@ public final class SegmentInfos extends
throw new IllegalStateException("prepareCommit was already called");
write(dir);
}
+
+ private final long writeGlobalFieldMap(FieldNumberBiMap map, Directory dir, String name) throws IOException {
+ final IndexOutput output = dir.createOutput(name);
+ boolean success = false;
+ long version;
+ try {
+ version = map.write(output);
+ success = true;
+ } finally {
+ try {
+ output.close();
+ } catch (Throwable t) {
+ // throw orig excp
+ }
+ if (!success) {
+ try {
+ dir.deleteFile(name);
+ } catch (Throwable t) {
+ // throw orig excp
+ }
+ }
+ }
+ return version;
+ }
+
+ private void readGlobalFieldMap(FieldNumberBiMap map, Directory dir) throws IOException {
+ final String name = getGlobalFieldNumberName(lastGlobalFieldMapVersion);
+ final IndexInput input = dir.openInput(name);
+ try {
+ map.read(input);
+ } finally {
+ input.close();
+ }
+ }
/** Returns all file names referenced by SegmentInfo
* instances matching the provided Directory (ie files
@@ -769,7 +854,17 @@ public final class SegmentInfos extends
public Collection<String> files(Directory dir, boolean includeSegmentsFile) throws IOException {
HashSet<String> files = new HashSet<String>();
if (includeSegmentsFile) {
- files.add(getCurrentSegmentFileName());
+ final String segmentFileName = getCurrentSegmentFileName();
+ if (segmentFileName != null) {
+ /*
+ * TODO: if lastGen == -1 we get might get null here it seems wrong to
+ * add null to the files set
+ */
+ files.add(segmentFileName);
+ }
+ if (lastGlobalFieldMapVersion > 0) {
+ files.add(getGlobalFieldNumberName(lastGlobalFieldMapVersion));
+ }
}
final int size = size();
for(int i=0;i<size;i++) {
@@ -821,6 +916,17 @@ public final class SegmentInfos extends
}
lastGeneration = generation;
+ if (pendingMapVersion != -1) {
+ /*
+ * TODO is it possible that the commit does not succeed here? if another
+ * commit happens at the same time and we lost the race between the
+ * prepareCommit and finishCommit the latest version is already
+ * incremented.
+ */
+ globalFieldNumberMap.commitLastVersion(pendingMapVersion);
+ pendingMapVersion = -1;
+ lastGlobalFieldMapVersion = globalFieldMapVersion;
+ }
try {
IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN);
@@ -848,6 +954,7 @@ public final class SegmentInfos extends
prepareCommit(dir);
finishCommit(dir);
}
+
public synchronized String toString(Directory directory) {
StringBuilder buffer = new StringBuilder();
@@ -883,6 +990,8 @@ public final class SegmentInfos extends
clear();
addAll(other);
lastGeneration = other.lastGeneration;
+ lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
+ format = other.format;
}
/** Returns sum of all segment's docCounts. Note that
@@ -900,4 +1009,49 @@ public final class SegmentInfos extends
public void changed() {
version++;
}
+
+ /**
+ * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
+ * If this {@link SegmentInfos} has no global field number map the returned instance is empty
+ */
+ synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
+ if (globalFieldNumberMap != null) {
+ return globalFieldNumberMap;
+ }
+ final FieldNumberBiMap map = new FieldNumberBiMap();
+
+ if (lastGlobalFieldMapVersion > 0) {
+ // if we don't have a global map or this is a SI from a earlier version we just return the empty map;
+ readGlobalFieldMap(map, dir);
+ }
+ if (size() > 0) {
+ if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
+ assert lastGlobalFieldMapVersion == 0;
+ // build the map up if we open a pre 4.0 index
+ for (SegmentInfo info : this) {
+ final FieldInfos segFieldInfos = info.getFieldInfos();
+ for (FieldInfo fi : segFieldInfos) {
+ map.addOrGet(fi.name, fi.number);
+ }
+ }
+ }
+ }
+ return globalFieldNumberMap = map;
+ }
+
+ /**
+ * Called by {@link SegmentInfosReader} when reading the global field map version
+ */
+ public void setGlobalFieldMapVersion(long version) {
+ lastGlobalFieldMapVersion = globalFieldMapVersion = version;
+ }
+
+ public long getGlobalFieldMapVersion() {
+ return globalFieldMapVersion;
+ }
+
+ // for testing
+ long getLastGlobalFieldMapVersion() {
+ return lastGlobalFieldMapVersion;
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Tue Mar 22 11:12:09 2011
@@ -26,16 +26,16 @@ import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
-import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.MergeState;
+import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.MultiBits;
+import org.apache.lucene.util.ReaderUtil;
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
@@ -75,8 +75,8 @@ final class SegmentMerger {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
this.codecs = codecs;
- this.fieldInfos = fieldInfos;
segment = name;
+ this.fieldInfos = fieldInfos;
if (merge != null) {
checkAbort = new MergeState.CheckAbort(merge, directory);
} else {
@@ -180,9 +180,8 @@ final class SegmentMerger {
SegmentReader segmentReader = (SegmentReader) reader;
boolean same = true;
FieldInfos segmentFieldInfos = segmentReader.fieldInfos();
- int numFieldInfos = segmentFieldInfos.size();
- for (int j = 0; same && j < numFieldInfos; j++) {
- same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j));
+ for (FieldInfo fi : segmentFieldInfos) {
+ same = fieldInfos.fieldName(fi.number).equals(fi.name);
}
if (same) {
matchingSegmentReaders[i] = segmentReader;
@@ -208,9 +207,8 @@ final class SegmentMerger {
if (reader instanceof SegmentReader) {
SegmentReader segmentReader = (SegmentReader) reader;
FieldInfos readerFieldInfos = segmentReader.fieldInfos();
- int numReaderFieldInfos = readerFieldInfos.size();
- for (int j = 0; j < numReaderFieldInfos; j++) {
- fieldInfos.add(readerFieldInfos.fieldInfo(j));
+ for (FieldInfo fi : readerFieldInfos) {
+ fieldInfos.add(fi);
}
} else {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
@@ -224,13 +222,13 @@ final class SegmentMerger {
}
}
final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, this.codecs);
- fieldInfos.write(directory, segment + ".fnm");
+ fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
int docCount = 0;
setMatchingSegmentReaders();
- final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
+ final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment);
try {
int idx = 0;
@@ -312,7 +310,7 @@ final class SegmentMerger {
// NOTE: it's very important to first assign to doc then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Document doc = reader.document(j);
- fieldsWriter.addDocument(doc);
+ fieldsWriter.addDocument(doc, fieldInfos);
docCount++;
checkAbort.work(300);
}
@@ -339,7 +337,7 @@ final class SegmentMerger {
// NOTE: it's very important to first assign to doc then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Document doc = reader.document(docCount);
- fieldsWriter.addDocument(doc);
+ fieldsWriter.addDocument(doc, fieldInfos);
checkAbort.work(300);
}
}
@@ -579,8 +577,7 @@ final class SegmentMerger {
private void mergeNorms() throws IOException {
IndexOutput output = null;
try {
- for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) {
- final FieldInfo fi = fieldInfos.fieldInfo(i);
+ for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java Tue Mar 22 11:12:09 2011
@@ -22,23 +22,22 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
-
import java.util.List;
import java.util.Map;
import java.util.Set;
-
import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CloseableThreadLocal;
/**
* @lucene.experimental
@@ -119,8 +118,8 @@ public class SegmentReader extends Index
dir0 = cfsReader;
}
cfsDir = dir0;
-
- fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELD_INFOS_EXTENSION));
+ si.loadFieldInfos(cfsDir, false); // prevent opening the CFS to load fieldInfos
+ fieldInfos = si.getFieldInfos();
this.termsIndexDivisor = termsIndexDivisor;
@@ -598,12 +597,12 @@ public class SegmentReader extends Index
&& (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
boolean normsUpToDate = true;
- boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()];
- final int fieldCount = core.fieldInfos.size();
- for (int i = 0; i < fieldCount; i++) {
- if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
+ Set<Integer> fieldNormsChanged = new HashSet<Integer>();
+ for (FieldInfo fi : core.fieldInfos) {
+ int fieldNumber = fi.number;
+ if (!this.si.getNormFileName(fieldNumber).equals(si.getNormFileName(fieldNumber))) {
normsUpToDate = false;
- fieldNormsChanged[i] = true;
+ fieldNormsChanged.add(fieldNumber);
}
}
@@ -659,11 +658,10 @@ public class SegmentReader extends Index
clone.norms = new HashMap<String,Norm>();
// Clone norms
- for (int i = 0; i < fieldNormsChanged.length; i++) {
-
+ for (FieldInfo fi : core.fieldInfos) {
// Clone unchanged norms to the cloned reader
- if (doClone || !fieldNormsChanged[i]) {
- final String curField = core.fieldInfos.fieldInfo(i).name;
+ if (doClone || !fieldNormsChanged.contains(fi.number)) {
+ final String curField = fi.name;
Norm norm = this.norms.get(curField);
if (norm != null)
clone.norms.put(curField, (Norm) norm.clone());
@@ -735,7 +733,7 @@ public class SegmentReader extends Index
}
if (normsDirty) { // re-write norms
- si.initNormGen(core.fieldInfos.size());
+ si.initNormGen();
for (final Norm norm : norms.values()) {
if (norm.dirty) {
norm.reWrite(si);
@@ -880,8 +878,7 @@ public class SegmentReader extends Index
ensureOpen();
Set<String> fieldSet = new HashSet<String>();
- for (int i = 0; i < core.fieldInfos.size(); i++) {
- FieldInfo fi = core.fieldInfos.fieldInfo(i);
+ for (FieldInfo fi : core.fieldInfos) {
if (fieldOption == IndexReader.FieldOption.ALL) {
fieldSet.add(fi.name);
}
@@ -954,8 +951,7 @@ public class SegmentReader extends Index
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
int maxDoc = maxDoc();
- for (int i = 0; i < core.fieldInfos.size(); i++) {
- FieldInfo fi = core.fieldInfos.fieldInfo(i);
+ for (FieldInfo fi : core.fieldInfos) {
if (norms.containsKey(fi.name)) {
// in case this SegmentReader is being re-opened, we might be able to
// reuse some norm instances and skip loading them here
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java Tue Mar 22 11:12:09 2011
@@ -27,15 +27,13 @@ final class StoredFieldsWriter {
FieldsWriter fieldsWriter;
final DocumentsWriter docWriter;
- final FieldInfos fieldInfos;
int lastDocID;
PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
- public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) {
+ public StoredFieldsWriter(DocumentsWriter docWriter) {
this.docWriter = docWriter;
- this.fieldInfos = fieldInfos;
}
public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException {
@@ -62,7 +60,7 @@ final class StoredFieldsWriter {
private synchronized void initFieldsWriter() throws IOException {
if (fieldsWriter == null) {
- fieldsWriter = new FieldsWriter(docWriter.directory, docWriter.getSegment(), fieldInfos);
+ fieldsWriter = new FieldsWriter(docWriter.directory, docWriter.getSegment());
lastDocID = 0;
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java Tue Mar 22 11:12:09 2011
@@ -32,7 +32,7 @@ final class StoredFieldsWriterPerThread
public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException {
this.storedFieldsWriter = storedFieldsWriter;
this.docState = docState;
- localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos);
+ localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null);
}
public void startDocument() {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHash.java?rev=1084134&r1=1084133&r2=1084134&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHash.java Tue Mar 22 11:12:09 2011
@@ -57,12 +57,6 @@ final class TermsHash extends InvertedDo
}
@Override
- void setFieldInfos(FieldInfos fieldInfos) {
- this.fieldInfos = fieldInfos;
- consumer.setFieldInfos(fieldInfos);
- }
-
- @Override
public void abort() {
consumer.abort();
if (nextTermsHash != null)