You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/12/19 11:04:55 UTC
svn commit: r1050784 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/src/java/org/apache/lucene/index/
lucene/src/test/org/apache/lucene/index/ solr/
Author: mikemccand
Date: Sun Dec 19 10:04:54 2010
New Revision: 1050784
URL: http://svn.apache.org/viewvc?rev=1050784&view=rev
Log:
LUCENE-1737: set up FieldInfos correctly the when we open a pre-3.1 index; pass FieldInfos to SegmentMerger
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sun Dec 19 10:04:54 2010
@@ -126,8 +126,6 @@ final class DocumentsWriter {
boolean bufferIsFull; // True when it's time to write segment
private boolean aborting; // True if an abort is pending
- private DocFieldProcessor docFieldProcessor;
-
PrintStream infoStream;
int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
Similarity similarity;
@@ -292,9 +290,6 @@ final class DocumentsWriter {
flushControl = writer.flushControl;
consumer = indexingChain.getChain(this);
- if (consumer instanceof DocFieldProcessor) {
- docFieldProcessor = (DocFieldProcessor) consumer;
- }
}
// Buffer a specific docID for deletion. Currently only
@@ -352,13 +347,6 @@ final class DocumentsWriter {
return fieldInfos;
}
- /** Returns true if any of the fields in the current
- * buffered docs have omitTermFreqAndPositions==false */
- boolean hasProx() {
- return (docFieldProcessor != null) ? fieldInfos.hasProx()
- : true;
- }
-
/** If non-null, various details of indexing are printed
* here. */
synchronized void setInfoStream(PrintStream infoStream) {
@@ -711,7 +699,7 @@ final class DocumentsWriter {
final SegmentWriteState flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval());
- newSegment = new SegmentInfo(segment, numDocsInRAM, directory, false, true, -1, null, false, hasProx(), false);
+ newSegment = new SegmentInfo(segment, numDocsInRAM, directory, false, true, -1, null, false, fieldInfos.hasProx(), false);
if (!closeDocStore || docStoreOffset != 0) {
newSegment.setDocStoreSegment(docStoreSegment);
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sun Dec 19 10:04:54 2010
@@ -243,6 +243,13 @@ final class FieldInfos {
return fi;
}
+ synchronized public FieldInfo add(FieldInfo fi) {
+ return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
+ fi.omitNorms, fi.storePayloads,
+ fi.omitTermFreqAndPositions);
+ }
+
private FieldInfo addInternal(String name, boolean isIndexed,
boolean storeTermVector, boolean storePositionWithTermVector,
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Dec 19 10:04:54 2010
@@ -17,36 +17,36 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Query;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.Version;
-import java.io.IOException;
-import java.io.Closeable;
-import java.io.PrintStream;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.List;
-import java.util.Collection;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Date;
-
/**
An <code>IndexWriter</code> creates and maintains an index.
@@ -1167,20 +1167,41 @@ public class IndexWriter implements Clos
}
}
- private FieldInfos getCurrentFieldInfos() throws IOException {
- final FieldInfos fieldInfos;
- if (segmentInfos.size() > 0) {
- SegmentInfo info = segmentInfos.info(segmentInfos.size()-1);
- Directory cfsDir;
+ private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
+ Directory cfsDir = null;
+ try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
} else {
cfsDir = directory;
}
- fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
- if (info.getUseCompoundFile()) {
+ return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
+ } finally {
+ if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
+ }
+ }
+
+ private FieldInfos getCurrentFieldInfos() throws IOException {
+ final FieldInfos fieldInfos;
+ if (segmentInfos.size() > 0) {
+ if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
+ // Pre-3.1 index. In this case we sweep all
+ // segments, merging their FieldInfos:
+ fieldInfos = new FieldInfos();
+ for(SegmentInfo info : segmentInfos) {
+ final FieldInfos segFieldInfos = getFieldInfos(info);
+ final int fieldCount = segFieldInfos.size();
+ for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
+ fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
+ }
+ }
+ } else {
+ // Already a 3.1 index; just seed the FieldInfos
+ // from the last segment
+ fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
+ }
} else {
fieldInfos = new FieldInfos();
}
@@ -2897,7 +2918,9 @@ public class IndexWriter implements Clos
try {
String mergedName = newSegmentName();
- SegmentMerger merger = new SegmentMerger(this, mergedName, null);
+ SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
+ mergedName, null, payloadProcessorProvider,
+ ((FieldInfos) docWriter.getFieldInfos().clone()));
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
@@ -3983,8 +4006,6 @@ public class IndexWriter implements Clos
final String mergedName = merge.info.name;
- SegmentMerger merger = null;
-
int mergedDocCount = 0;
SegmentInfos sourceSegments = merge.segments;
@@ -3993,7 +4014,9 @@ public class IndexWriter implements Clos
if (infoStream != null)
message("merging " + merge.segString(directory));
- merger = new SegmentMerger(this, mergedName, merge);
+ SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
+ payloadProcessorProvider,
+ ((FieldInfos) docWriter.getFieldInfos().clone()));
merge.readers = new SegmentReader[numSegments];
merge.readersClone = new SegmentReader[numSegments];
@@ -4074,6 +4097,7 @@ public class IndexWriter implements Clos
// Clear DSS
merge.info.setDocStore(-1, null, false);
+ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
}
// This is where all the work happens:
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sun Dec 19 10:04:54 2010
@@ -109,12 +109,22 @@ public final class SegmentInfos extends
private Map<String,String> userData = Collections.<String,String>emptyMap(); // Opaque Map<String, String> that user can specify during IndexWriter.commit
+ private int format;
+
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
*/
private static PrintStream infoStream = null;
+ public void setFormat(int format) {
+ this.format = format;
+ }
+
+ public int getFormat() {
+ return format;
+ }
+
public final SegmentInfo info(int i) {
return get(i);
}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sun Dec 19 10:04:54 2010
@@ -52,7 +52,7 @@ final class SegmentMerger {
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
private List<IndexReader> readers = new ArrayList<IndexReader>();
- private FieldInfos fieldInfos;
+ private final FieldInfos fieldInfos;
private int mergedDocs;
@@ -68,28 +68,12 @@ final class SegmentMerger {
when merging stored fields */
private final static int MAX_RAW_MERGE_DOCS = 4192;
- private final PayloadProcessorProvider pcp;
+ private final PayloadProcessorProvider payloadProcessorProvider;
- /** This ctor used only by test code.
- *
- * @param dir The Directory to merge the other segments into
- * @param name The name of the new segment
- */
- SegmentMerger(Directory dir, String name) {
- pcp = null;
+ SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+ this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
- segment = name;
- checkAbort = new CheckAbort(null, null) {
- @Override
- public void work(double units) throws MergeAbortedException {
- // do nothing
- }
- };
- }
-
- SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge) {
- pcp = writer.getPayloadProcessorProvider();
- directory = writer.getDirectory();
+ this.fieldInfos = fieldInfos;
segment = name;
if (merge != null) {
checkAbort = new CheckAbort(merge, directory);
@@ -101,7 +85,7 @@ final class SegmentMerger {
}
};
}
- termIndexInterval = writer.getConfig().getTermIndexInterval();
+ this.termIndexInterval = termIndexInterval;
}
public FieldInfos fieldInfos() {
@@ -222,6 +206,11 @@ final class SegmentMerger {
private SegmentReader[] matchingSegmentReaders;
private int[] rawDocLengths;
private int[] rawDocLengths2;
+ private int matchedCount;
+
+ public int getMatchedSubReaderCount() {
+ return matchedCount;
+ }
private void setMatchingSegmentReaders() {
// If the i'th reader is a SegmentReader and has
@@ -246,6 +235,7 @@ final class SegmentMerger {
}
if (same) {
matchingSegmentReaders[i] = segmentReader;
+ matchedCount++;
}
}
}
@@ -261,18 +251,7 @@ final class SegmentMerger {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
- private final int mergeFields() throws CorruptIndexException, IOException {
-
- if (!mergeDocStores) {
- // When we are not merging by doc stores, their field
- // name -> number mapping are the same. So, we start
- // with the fieldInfos of the last segment in this
- // case, to keep that numbering.
- final SegmentReader sr = (SegmentReader) readers.get(readers.size()-1);
- fieldInfos = (FieldInfos) sr.core.fieldInfos.clone();
- } else {
- fieldInfos = new FieldInfos(); // merge field names
- }
+ private int mergeFields() throws CorruptIndexException, IOException {
for (IndexReader reader : readers) {
if (reader instanceof SegmentReader) {
@@ -280,11 +259,7 @@ final class SegmentMerger {
FieldInfos readerFieldInfos = segmentReader.fieldInfos();
int numReaderFieldInfos = readerFieldInfos.size();
for (int j = 0; j < numReaderFieldInfos; j++) {
- FieldInfo fi = readerFieldInfos.fieldInfo(j);
- fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector,
- fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
- !reader.hasNorms(fi.name), fi.storePayloads,
- fi.omitTermFreqAndPositions);
+ fieldInfos.add(readerFieldInfos.fieldInfo(j));
}
} else {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
@@ -570,8 +545,8 @@ final class SegmentMerger {
IndexReader reader = readers.get(i);
TermEnum termEnum = reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
- if (pcp != null) {
- smi.dirPayloadProcessor = pcp.getDirProcessor(reader.directory());
+ if (payloadProcessorProvider != null) {
+ smi.dirPayloadProcessor = payloadProcessorProvider.getDirProcessor(reader.directory());
}
int[] docMap = smi.getDocMap();
if (docMap != null) {
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Sun Dec 19 10:04:54 2010
@@ -300,6 +300,9 @@ public class TestBackwardsCompatibility
public void testIndexOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
+ if (VERBOSE) {
+ System.out.println("TEST: oldName=" + oldNames[i]);
+ }
unzip(getDataFile("index." + oldNames[i] + ".zip"), oldNames[i]);
changeIndexWithAdds(random, oldNames[i]);
rmDir(oldNames[i]);
@@ -405,6 +408,7 @@ public class TestBackwardsCompatibility
Directory dir = newFSDirectory(new File(dirName));
// open writer
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
+ writer.setInfoStream(VERBOSE ? System.out : null);
// add 10 docs
for(int i=0;i<10;i++) {
addDoc(writer, 35+i);
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java Sun Dec 19 10:04:54 2010
@@ -183,7 +183,7 @@ public class TestDoc extends LuceneTestC
SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- SegmentMerger merger = new SegmentMerger(si1.dir, merged);
+ SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, null, new FieldInfos());
merger.add(r1);
merger.add(r2);
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Sun Dec 19 10:04:54 2010
@@ -71,7 +71,7 @@ public class TestSegmentMerger extends L
}
public void testMerge() throws IOException {
- SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment);
+ SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, null, new FieldInfos());
merger.add(reader1);
merger.add(reader2);
int docsMerged = merger.merge();