You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/03/25 17:24:13 UTC
svn commit: r1085449 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/ test-framework/org/apache/lucene/util/
test/org/apache/lucene/index/
Author: simonw
Date: Fri Mar 25 16:24:12 2011
New Revision: 1085449
URL: http://svn.apache.org/viewvc?rev=1085449&view=rev
Log:
LUCENE-2985: Build SegmentCodecs incrementally for consistent codecIDs during indexing
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Fri Mar 25 16:24:12 2011
@@ -162,7 +162,7 @@ final class DocFieldProcessorPerThread e
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
- FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
+ FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
fp = new DocFieldProcessorPerField(this, fi);
@@ -172,11 +172,11 @@ final class DocFieldProcessorPerThread e
if (totalFieldCount >= fieldHash.length/2)
rehash();
- } else
- fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
+ } else {
+ fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
-
+ }
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Mar 25 16:24:12 2011
@@ -283,7 +283,6 @@ final class DocumentsWriter {
this.fieldInfos = fieldInfos;
this.bufferedDeletesStream = bufferedDeletesStream;
flushControl = writer.flushControl;
-
consumer = config.getIndexingChain().getChain(this);
this.config = config;
}
@@ -539,10 +538,10 @@ final class DocumentsWriter {
if (infoStream != null) {
message("flush postings as segment " + segment + " numDocs=" + numDocs);
}
-
+
final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
numDocs, writer.getConfig().getTermIndexInterval(),
- SegmentCodecs.build(fieldInfos, writer.codecs),
+ fieldInfos.buildSegmentCodecs(true),
pendingDeletes);
// Apply delete-by-docID now (delete-byDocID only
// happens when an exception is hit processing that
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Fri Mar 25 16:24:12 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
/** @lucene.experimental */
public final class FieldInfo {
+ public static final int UNASSIGNED_CODEC_ID = -1;
public String name;
public boolean isIndexed;
public int number;
@@ -32,7 +33,7 @@ public final class FieldInfo {
public boolean omitTermFreqAndPositions;
public boolean storePayloads; // whether this field stores payloads together with term positions
- private int codecId = -1; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
+ private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
@@ -57,8 +58,8 @@ public final class FieldInfo {
}
}
- public void setCodecId(int codecId) {
- assert this.codecId == -1 : "CodecId can only be set once.";
+ void setCodecId(int codecId) {
+ assert this.codecId == UNASSIGNED_CODEC_ID : "CodecId can only be set once.";
this.codecId = codecId;
}
@@ -74,6 +75,7 @@ public final class FieldInfo {
return clone;
}
+ // should only be called by FieldInfos#addOrUpdate
void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
if (this.isIndexed != isIndexed) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Fri Mar 25 16:24:12 2011
@@ -28,6 +28,8 @@ import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Map.Entry;
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
+import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -147,8 +149,8 @@ public final class FieldInfos implements
* @return a new {@link FieldInfos} instance with this as the global field
* map
*/
- public FieldInfos newFieldInfos() {
- return new FieldInfos(this);
+ public FieldInfos newFieldInfos(SegmentCodecsBuilder segmentCodecsBuilder) {
+ return new FieldInfos(this, segmentCodecsBuilder);
}
/**
@@ -193,6 +195,7 @@ public final class FieldInfos implements
private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
private final FieldNumberBiMap globalFieldNumbers;
+ private final SegmentCodecsBuilder segmentCodecsBuilder;
// First used in 2.9; prior to 2.9 there was no format header
public static final int FORMAT_START = -2;
@@ -215,14 +218,15 @@ public final class FieldInfos implements
/**
* Creates a new {@link FieldInfos} instance with a private
- * {@link FieldNumberBiMap}.
+ * {@link FieldNumberBiMap} and a default {@link SegmentCodecsBuilder}
+ * initialized with {@link CodecProvider#getDefault()}.
* <p>
* Note: this ctor should not be used during indexing use
* {@link FieldInfos#FieldInfos(FieldInfos)} or
* {@link FieldInfos#FieldInfos(FieldNumberBiMap)} instead.
*/
public FieldInfos() {
- this(new FieldNumberBiMap());
+ this(new FieldNumberBiMap(), SegmentCodecsBuilder.create(CodecProvider.getDefault()));
}
/**
@@ -232,7 +236,7 @@ public final class FieldInfos implements
* @see #isReadOnly()
*/
FieldInfos(FieldInfos other) {
- this(other.globalFieldNumbers);
+ this(other.globalFieldNumbers, other.segmentCodecsBuilder);
}
/**
@@ -240,8 +244,9 @@ public final class FieldInfos implements
* If the {@link FieldNumberBiMap} is <code>null</code> this instance will be read-only.
* @see #isReadOnly()
*/
- FieldInfos(FieldNumberBiMap globalFieldNumbers) {
+ FieldInfos(FieldNumberBiMap globalFieldNumbers, SegmentCodecsBuilder segmentCodecsBuilder) {
this.globalFieldNumbers = globalFieldNumbers;
+ this.segmentCodecsBuilder = segmentCodecsBuilder;
}
/**
@@ -255,7 +260,7 @@ public final class FieldInfos implements
* @throws IOException
*/
public FieldInfos(Directory d, String name) throws IOException {
- this((FieldNumberBiMap)null); // use null here to make this FIs Read-Only
+ this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
IndexInput input = d.openInput(name);
try {
read(input, name);
@@ -291,7 +296,7 @@ public final class FieldInfos implements
*/
@Override
synchronized public Object clone() {
- FieldInfos fis = new FieldInfos(globalFieldNumbers);
+ FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
for (FieldInfo fi : this) {
FieldInfo clone = (FieldInfo) (fi).clone();
fis.putInternal(clone);
@@ -310,17 +315,17 @@ public final class FieldInfos implements
}
/**
- * Add fields that are indexed. Whether they have termvectors has to be specified.
+ * Adds or updates fields that are indexed. Whether they have termvectors has to be specified.
*
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
* @param storePositionWithTermVector true if positions should be stored.
* @param storeOffsetWithTermVector true if offsets should be stored
*/
- synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
+ synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector) {
for (String name : names) {
- add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ addOrUpdate(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
@@ -330,11 +335,11 @@ public final class FieldInfos implements
* @param names The names of the fields
* @param isIndexed Whether the fields are indexed or not
*
- * @see #add(String, boolean)
+ * @see #addOrUpdate(String, boolean)
*/
- synchronized public void add(Collection<String> names, boolean isIndexed) {
+ synchronized public void addOrUpdate(Collection<String> names, boolean isIndexed) {
for (String name : names) {
- add(name, isIndexed);
+ addOrUpdate(name, isIndexed);
}
}
@@ -343,10 +348,10 @@ public final class FieldInfos implements
*
* @param name The name of the Fieldable
* @param isIndexed true if the field is indexed
- * @see #add(String, boolean, boolean, boolean, boolean)
+ * @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
*/
- synchronized public void add(String name, boolean isIndexed) {
- add(name, isIndexed, false, false, false, false);
+ synchronized public void addOrUpdate(String name, boolean isIndexed) {
+ addOrUpdate(name, isIndexed, false, false, false, false);
}
/**
@@ -356,8 +361,8 @@ public final class FieldInfos implements
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
- add(name, isIndexed, storeTermVector, false, false, false);
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector){
+ addOrUpdate(name, isIndexed, storeTermVector, false, false, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -371,10 +376,10 @@ public final class FieldInfos implements
* @param storePositionWithTermVector true if the term vector with positions should be stored
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
- add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -389,9 +394,9 @@ public final class FieldInfos implements
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
* @param omitNorms true if the norms for the indexed field should be omitted
*/
- synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
- add(name, isIndexed, storeTermVector, storePositionWithTermVector,
+ addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
storeOffsetWithTermVector, omitNorms, false, false);
}
@@ -409,7 +414,7 @@ public final class FieldInfos implements
* @param storePayloads true if payloads should be stored for this field
* @param omitTermFreqAndPositions true if term freqs should be omitted for this field
*/
- synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
+ synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
@@ -422,13 +427,17 @@ public final class FieldInfos implements
if (globalFieldNumbers == null) {
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
}
- final FieldInfo fi = fieldInfo(name);
+ assert segmentCodecsBuilder != null : "SegmentCodecsBuilder is set to null but FieldInfos is not read-only";
+ FieldInfo fi = fieldInfo(name);
if (fi == null) {
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
- return addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
} else {
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
}
+ if (fi.isIndexed && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+ segmentCodecsBuilder.tryAddAndSet(fi);
+ }
return fi;
}
@@ -515,6 +524,22 @@ public final class FieldInfos implements
}
return false;
}
+
+ /**
+ * Builds the {@link SegmentCodecs} mapping for this {@link FieldInfos} instance.
+ * @param clearBuilder <code>true</code> iff the internal {@link SegmentCodecsBuilder} must be cleared otherwise <code>false</code>
+ */
+ public SegmentCodecs buildSegmentCodecs(boolean clearBuilder) {
+ if (globalFieldNumbers == null) {
+ throw new IllegalStateException("FieldInfos are read-only no SegmentCodecs available");
+ }
+ assert segmentCodecsBuilder != null;
+ final SegmentCodecs segmentCodecs = segmentCodecsBuilder.build();
+ if (clearBuilder) {
+ segmentCodecsBuilder.clear();
+ }
+ return segmentCodecs;
+ }
public void write(Directory d, String name) throws IOException {
IndexOutput output = d.createOutput(name);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Mar 25 16:24:12 2011
@@ -38,6 +38,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException;
@@ -790,7 +791,7 @@ public class IndexWriter implements Clos
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(),
- globalFieldNumberMap.newFieldInfos(), bufferedDeletesStream);
+ globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -2208,7 +2209,7 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
mergedName, null, codecs, payloadProcessorProvider,
- globalFieldNumberMap.newFieldInfos());
+ globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
@@ -2951,7 +2952,7 @@ public class IndexWriter implements Clos
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos());
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Fri Mar 25 16:24:12 2011
@@ -67,6 +67,7 @@ final class PerFieldCodecWrapper extends
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
+ assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
final FieldsConsumer fields = consumers.get(field.getCodecId());
return fields.addField(field);
}
@@ -106,6 +107,7 @@ final class PerFieldCodecWrapper extends
for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed) { // TODO this does not work for non-indexed fields
fields.add(fi.name);
+ assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
Codec codec = segmentCodecs.codecs[fi.getCodecId()];
if (!producers.containsKey(codec)) {
producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java Fri Mar 25 16:24:12 2011
@@ -38,17 +38,16 @@ import org.apache.lucene.store.IndexOutp
* {@link SegmentWriteState} for each flush and is maintained in the
* corresponding {@link SegmentInfo} until it is committed.
* <p>
- * {@link SegmentCodecs#build(FieldInfos, CodecProvider)} should be used to
- * create a {@link SegmentCodecs} instance during {@link IndexWriter} sessions
- * which creates the ordering of distinct codecs and assigns the
- * {@link FieldInfo#codecId} or in other words, the ord of the codec maintained
- * inside {@link SegmentCodecs}, to the {@link FieldInfo}. This ord is valid
- * only until the current segment is flushed and {@link FieldInfos} for that
- * segment are written including the ord for each field. This ord is later used
- * to get the right codec when the segment is opened in a reader. The
- * {@link Codec} returned from {@link SegmentCodecs#codec()} in turn uses
- * {@link SegmentCodecs} internal structure to select and initialize the right
- * codec for a fields when it is written.
+ * During indexing {@link FieldInfos} uses {@link SegmentCodecsBuilder} to incrementally
+ * build the {@link SegmentCodecs} mapping. Once a segment is flushed
+ * DocumentsWriter creates a {@link SegmentCodecs} instance from
+ * {@link FieldInfos#buildSegmentCodecs(boolean)} The {@link FieldInfo#codecId}
+ * assigned by {@link SegmentCodecsBuilder} refers to the codecs ordinal
+ * maintained inside {@link SegmentCodecs}. This ord is later used to get the
+ * right codec when the segment is opened in a reader.The {@link Codec} returned
+ * from {@link SegmentCodecs#codec()} in turn uses {@link SegmentCodecs}
+ * internal structure to select and initialize the right codec for a fields when
+ * it is written.
* <p>
* Once a flush succeeded the {@link SegmentCodecs} is maintained inside the
* {@link SegmentInfo} for the flushed segment it was created for.
@@ -64,36 +63,19 @@ final class SegmentCodecs implements Clo
* internal structure to map codecs to fields - don't modify this from outside
* of this class!
*/
- Codec[] codecs;
+ final Codec[] codecs;
final CodecProvider provider;
private final Codec codec = new PerFieldCodecWrapper(this);
-
+
+ SegmentCodecs(CodecProvider provider, IndexInput input) throws IOException {
+ this(provider, read(input, provider));
+ }
+
SegmentCodecs(CodecProvider provider, Codec... codecs) {
this.provider = provider;
this.codecs = codecs;
}
- static SegmentCodecs build(FieldInfos infos, CodecProvider provider) {
- final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
- final ArrayList<Codec> codecs = new ArrayList<Codec>();
-
- for (FieldInfo fi : infos) {
- if (fi.isIndexed) {
- final Codec fieldCodec = provider.lookup(provider
- .getFieldCodec(fi.name));
- Integer ord = codecRegistry.get(fieldCodec);
- if (ord == null) {
- ord = Integer.valueOf(codecs.size());
- codecRegistry.put(fieldCodec, ord);
- codecs.add(fieldCodec);
- }
- fi.setCodecId(ord.intValue());
- }
- }
- return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
-
- }
-
Codec codec() {
return codec;
}
@@ -105,7 +87,7 @@ final class SegmentCodecs implements Clo
}
}
- void read(IndexInput in) throws IOException {
+ private static Codec[] read(IndexInput in, CodecProvider provider) throws IOException {
final int size = in.readVInt();
final ArrayList<Codec> list = new ArrayList<Codec>();
for (int i = 0; i < size; i++) {
@@ -113,7 +95,7 @@ final class SegmentCodecs implements Clo
final Codec lookup = provider.lookup(codecName);
list.add(i, lookup);
}
- codecs = list.toArray(Codec.EMPTY);
+ return list.toArray(Codec.EMPTY);
}
void files(Directory dir, SegmentInfo info, Set<String> files)
@@ -129,4 +111,58 @@ final class SegmentCodecs implements Clo
public String toString() {
return "SegmentCodecs [codecs=" + Arrays.toString(codecs) + ", provider=" + provider + "]";
}
+
+ /**
+ * Used in {@link FieldInfos} to incrementally build the codec ID mapping for
+ * {@link FieldInfo} instances.
+ * <p>
+ * Note: this class is not thread-safe
+ * </p>
+ * @see FieldInfo#getCodecId()
+ */
+ final static class SegmentCodecsBuilder {
+ private final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
+ private final ArrayList<Codec> codecs = new ArrayList<Codec>();
+ private final CodecProvider provider;
+
+ private SegmentCodecsBuilder(CodecProvider provider) {
+ this.provider = provider;
+ }
+
+ static SegmentCodecsBuilder create(CodecProvider provider) {
+ return new SegmentCodecsBuilder(provider);
+ }
+
+ SegmentCodecsBuilder tryAddAndSet(FieldInfo fi) {
+ if (fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+ final Codec fieldCodec = provider.lookup(provider
+ .getFieldCodec(fi.name));
+ Integer ord = codecRegistry.get(fieldCodec);
+ if (ord == null) {
+ ord = Integer.valueOf(codecs.size());
+ codecRegistry.put(fieldCodec, ord);
+ codecs.add(fieldCodec);
+ }
+ fi.setCodecId(ord.intValue());
+ }
+ return this;
+ }
+
+ SegmentCodecsBuilder addAll(FieldInfos infos) {
+ for (FieldInfo fieldInfo : infos) {
+ tryAddAndSet(fieldInfo);
+ }
+ return this;
+ }
+
+ SegmentCodecs build() {
+ return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
+ }
+
+ SegmentCodecsBuilder clear() {
+ codecRegistry.clear();
+ codecs.clear();
+ return this;
+ }
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri Mar 25 16:24:12 2011
@@ -209,13 +209,12 @@ public final class SegmentInfo {
hasProx = input.readByte() == YES;
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
- segmentCodecs = new SegmentCodecs(codecs);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
- segmentCodecs.read(input);
+ segmentCodecs = new SegmentCodecs(codecs, input);
} else {
// codec ID on FieldInfo is 0 so it will simply use the first codec available
// TODO what todo if preflex is not available in the provider? register it or fail?
- segmentCodecs.codecs = new Codec[] { codecs.lookup("PreFlex")};
+ segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
}
diagnostics = input.readStringStringMap();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Fri Mar 25 16:24:12 2011
@@ -148,7 +148,7 @@ final class SegmentMerger {
boolean storePayloads, boolean omitTFAndPositions)
throws IOException {
for (String field : names) {
- fInfos.add(field, true, storeTermVectors,
+ fInfos.addOrUpdate(field, true, storeTermVectors,
storePositionWithTermVector, storeOffsetWithTermVector, !reader
.hasNorms(field), storePayloads, omitTFAndPositions);
}
@@ -218,10 +218,10 @@ final class SegmentMerger {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
- fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false);
+ fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
}
}
- final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, this.codecs);
+ final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
int docCount = 0;
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Fri Mar 25 16:24:12 2011
@@ -334,7 +334,7 @@ public class _TestUtil {
public static void add(Document doc, FieldInfos fieldInfos) {
List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) {
- fieldInfos.add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
+ fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java Fri Mar 25 16:24:12 2011
@@ -80,7 +80,7 @@ public class TestCodecs extends LuceneTe
public FieldData(final String name, final FieldInfos fieldInfos, final TermData[] terms, final boolean omitTF, final boolean storePayloads) {
this.omitTF = omitTF;
this.storePayloads = storePayloads;
- fieldInfos.add(name, true);
+ fieldInfos.addOrUpdate(name, true);
fieldInfo = fieldInfos.fieldInfo(name);
fieldInfo.omitTermFreqAndPositions = omitTF;
fieldInfo.storePayloads = storePayloads;
@@ -240,7 +240,7 @@ public class TestCodecs extends LuceneTe
final Directory dir = newDirectory();
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, true);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(clonedFieldInfos, CodecProvider.getDefault()), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
si.setHasProx(false);
final FieldsProducer reader = si.getSegmentCodecs().codec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 64, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR));
@@ -293,7 +293,7 @@ public class TestCodecs extends LuceneTe
FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
this.write(fieldInfos, dir, fields, false);
- final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(clonedFieldInfos, CodecProvider.getDefault()), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+ final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
if (VERBOSE) {
System.out.println("TEST: now read postings");
@@ -442,7 +442,7 @@ public class TestCodecs extends LuceneTe
for(int iter=0;iter<NUM_TEST_ITER;iter++) {
final FieldData field = fields[TestCodecs.random.nextInt(fields.length)];
final TermsEnum termsEnum = termsDict.terms(field.fieldInfo.name).iterator();
-
+ assertTrue(field.fieldInfo.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID);
if (si.getSegmentCodecs().codecs[field.fieldInfo.getCodecId()] instanceof PreFlexCodec) {
// code below expects unicode sort order
continue;
@@ -591,12 +591,13 @@ public class TestCodecs extends LuceneTe
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable {
final int termIndexInterval = _TestUtil.nextInt(random, 13, 27);
- final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault());
+ final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo, null);
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
Arrays.sort(fields);
for (final FieldData field : fields) {
+ assertTrue(field.fieldInfo.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID);
if (!allowPreFlex && codecInfo.codecs[field.fieldInfo.getCodecId()] instanceof PreFlexCodec) {
// code below expects unicode sort order
continue;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java Fri Mar 25 16:24:12 2011
@@ -109,33 +109,33 @@ public class TestFieldInfos extends Luce
}
try {
- readOnly.add("bogus", random.nextBoolean());
+ readOnly.addOrUpdate("bogus", random.nextBoolean());
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected
}
try {
- readOnly.add("bogus", random.nextBoolean(), random.nextBoolean());
+ readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean());
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected
}
try {
- readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+ readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean());
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected
}
try {
- readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+ readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), random.nextBoolean());
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected
}
try {
- readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+ readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
random.nextBoolean(), random.nextBoolean());
fail("instance should be read only");
@@ -143,7 +143,7 @@ public class TestFieldInfos extends Luce
// expected
}
try {
- readOnly.add(Arrays.asList("a", "b", "c"), random.nextBoolean());
+ readOnly.addOrUpdate(Arrays.asList("a", "b", "c"), random.nextBoolean());
fail("instance should be read only");
} catch (IllegalStateException e) {
// expected