You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/12/18 23:38:03 UTC
svn commit: r1050728 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/
test/org/apache/lucene/index/
Author: mikemccand
Date: Sat Dec 18 22:38:02 2010
New Revision: 1050728
URL: http://svn.apache.org/viewvc?rev=1050728&view=rev
Log:
LUCENE-1737: set up FieldInfos correctly the when we open a pre-4.0 index; pass FieldInfos to SegmentMerger
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Dec 18 22:38:02 2010
@@ -126,8 +126,6 @@ final class DocumentsWriter {
boolean bufferIsFull; // True when it's time to write segment
private boolean aborting; // True if an abort is pending
- private DocFieldProcessor docFieldProcessor;
-
PrintStream infoStream;
int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
Similarity similarity;
@@ -294,9 +292,6 @@ final class DocumentsWriter {
flushControl = writer.flushControl;
consumer = indexingChain.getChain(this);
- if (consumer instanceof DocFieldProcessor) {
- docFieldProcessor = (DocFieldProcessor) consumer;
- }
}
// Buffer a specific docID for deletion. Currently only
@@ -354,13 +349,6 @@ final class DocumentsWriter {
return fieldInfos;
}
- /** Returns true if any of the fields in the current
- * buffered docs have omitTermFreqAndPositions==false */
- boolean hasProx() {
- return (docFieldProcessor != null) ? fieldInfos.hasProx()
- : true;
- }
-
/** If non-null, various details of indexing are printed
* here. */
synchronized void setInfoStream(PrintStream infoStream) {
@@ -597,7 +585,7 @@ final class DocumentsWriter {
numDocs, writer.getConfig().getTermIndexInterval(),
SegmentCodecs.build(fieldInfos, writer.codecs));
- newSegment = new SegmentInfo(segment, numDocs, directory, false, hasProx(), flushState.segmentCodecs, false);
+ newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false);
Collection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
for (DocumentsWriterThreadState threadState : threadStates) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sat Dec 18 22:38:02 2010
@@ -224,6 +224,13 @@ public final class FieldInfos {
return fi;
}
+ synchronized public FieldInfo add(FieldInfo fi) {
+ return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
+ fi.omitNorms, fi.storePayloads,
+ fi.omitTermFreqAndPositions);
+ }
+
private FieldInfo addInternal(String name, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sat Dec 18 22:38:02 2010
@@ -17,35 +17,36 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.search.Query;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
-import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.Bits;
-
-import java.io.IOException;
-import java.io.Closeable;
-import java.io.PrintStream;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.List;
-import java.util.Collection;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Date;
/**
An <code>IndexWriter</code> creates and maintains an index.
@@ -810,20 +811,41 @@ public class IndexWriter implements Clos
}
}
- private FieldInfos getCurrentFieldInfos() throws IOException {
- final FieldInfos fieldInfos;
- if (segmentInfos.size() > 0) {
- SegmentInfo info = segmentInfos.info(segmentInfos.size()-1);
- Directory cfsDir;
+ private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
+ Directory cfsDir = null;
+ try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
} else {
cfsDir = directory;
}
- fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.FIELD_INFOS_EXTENSION));
- if (info.getUseCompoundFile()) {
+ return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.FIELD_INFOS_EXTENSION));
+ } finally {
+ if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
+ }
+ }
+
+ private FieldInfos getCurrentFieldInfos() throws IOException {
+ final FieldInfos fieldInfos;
+ if (segmentInfos.size() > 0) {
+ if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_4_0) {
+ // Pre-4.0 index. In this case we sweep all
+ // segments, merging their FieldInfos:
+ fieldInfos = new FieldInfos();
+ for(SegmentInfo info : segmentInfos) {
+ final FieldInfos segFieldInfos = getFieldInfos(info);
+ final int fieldCount = segFieldInfos.size();
+ for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
+ fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
+ }
+ }
+ } else {
+ // Already a 4.0 index; just seed the FieldInfos
+ // from the last segment
+ fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
+ }
} else {
fieldInfos = new FieldInfos();
}
@@ -2213,7 +2235,8 @@ public class IndexWriter implements Clos
try {
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
- mergedName, null, codecs, payloadProcessorProvider);
+ mergedName, null, codecs, payloadProcessorProvider,
+ ((FieldInfos) docWriter.getFieldInfos().clone()));
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
@@ -3050,7 +3073,9 @@ public class IndexWriter implements Clos
if (infoStream != null)
message("merging " + merge.segString(directory));
- SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge, codecs, payloadProcessorProvider);
+ SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
+ codecs, payloadProcessorProvider,
+ ((FieldInfos) docWriter.getFieldInfos().clone()));
merge.readers = new SegmentReader[numSegments];
merge.readersClone = new SegmentReader[numSegments];
@@ -3093,6 +3118,7 @@ public class IndexWriter implements Clos
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
+ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
}
assert mergedDocCount == totDocCount;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sat Dec 18 22:38:02 2010
@@ -74,6 +74,8 @@ public final class SegmentInfos extends
private CodecProvider codecs;
+ private int format;
+
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
@@ -88,6 +90,14 @@ public final class SegmentInfos extends
this.codecs = codecs;
}
+ public void setFormat(int format) {
+ this.format = format;
+ }
+
+ public int getFormat() {
+ return format;
+ }
+
public final SegmentInfo info(int i) {
return get(i);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sat Dec 18 22:38:02 2010
@@ -56,7 +56,7 @@ final class SegmentMerger {
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
private List<IndexReader> readers = new ArrayList<IndexReader>();
- private FieldInfos fieldInfos;
+ private final FieldInfos fieldInfos;
private int mergedDocs;
@@ -72,10 +72,11 @@ final class SegmentMerger {
private PayloadProcessorProvider payloadProcessorProvider;
- SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider) {
+ SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
this.codecs = codecs;
+ this.fieldInfos = fieldInfos;
segment = name;
if (merge != null) {
checkAbort = new CheckAbort(merge, directory);
@@ -187,6 +188,11 @@ final class SegmentMerger {
private SegmentReader[] matchingSegmentReaders;
private int[] rawDocLengths;
private int[] rawDocLengths2;
+ private int matchedCount;
+
+ public int getMatchedSubReaderCount() {
+ return matchedCount;
+ }
private void setMatchingSegmentReaders() {
// If the i'th reader is a SegmentReader and has
@@ -211,6 +217,7 @@ final class SegmentMerger {
}
if (same) {
matchingSegmentReaders[i] = segmentReader;
+ matchedCount++;
}
}
}
@@ -227,7 +234,6 @@ final class SegmentMerger {
* @throws IOException if there is a low-level IO error
*/
private int mergeFields() throws CorruptIndexException, IOException {
- fieldInfos = new FieldInfos();// merge field names
for (IndexReader reader : readers) {
if (reader instanceof SegmentReader) {
@@ -235,11 +241,7 @@ final class SegmentMerger {
FieldInfos readerFieldInfos = segmentReader.fieldInfos();
int numReaderFieldInfos = readerFieldInfos.size();
for (int j = 0; j < numReaderFieldInfos; j++) {
- FieldInfo fi = readerFieldInfos.fieldInfo(j);
- fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector,
- fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
- !reader.hasNorms(fi.name), fi.storePayloads,
- fi.omitTermFreqAndPositions);
+ fieldInfos.add(readerFieldInfos.fieldInfo(j));
}
} else {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java Sat Dec 18 22:38:02 2010
@@ -40,7 +40,8 @@ public class DefaultSegmentInfosReader e
IndexInput input = null;
try {
input = openInput(directory, segmentsFileName);
- int format = input.readInt();
+ final int format = input.readInt();
+ infos.setFormat(format);
// check that it is a format we can understand
if (format > DefaultSegmentInfosWriter.FORMAT_MINIMUM)
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Sat Dec 18 22:38:02 2010
@@ -293,6 +293,9 @@ public class TestBackwardsCompatibility
public void testIndexOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
+ if (VERBOSE) {
+ System.out.println("TEST: oldName=" + oldNames[i]);
+ }
unzip(getDataFile("index." + oldNames[i] + ".zip"), oldNames[i]);
changeIndexWithAdds(random, oldNames[i]);
rmDir(oldNames[i]);
@@ -386,6 +389,7 @@ public class TestBackwardsCompatibility
Directory dir = newFSDirectory(new File(dirName));
// open writer
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND));
+ writer.setInfoStream(VERBOSE ? System.out : null);
// add 10 docs
for(int i=0;i<10;i++) {
addDoc(writer, 35+i);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java Sat Dec 18 22:38:02 2010
@@ -192,7 +192,7 @@ public class TestDoc extends LuceneTestC
SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault(), null);
+ SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault(), null, new FieldInfos());
merger.add(r1);
merger.add(r2);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1050728&r1=1050727&r2=1050728&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Sat Dec 18 22:38:02 2010
@@ -73,7 +73,7 @@ public class TestSegmentMerger extends L
}
public void testMerge() throws IOException {
- SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null);
+ SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos());
merger.add(reader1);
merger.add(reader2);
int docsMerged = merger.merge();