You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/12/19 11:04:55 UTC

svn commit: r1050784 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/src/java/org/apache/lucene/index/ lucene/src/test/org/apache/lucene/index/ solr/

Author: mikemccand
Date: Sun Dec 19 10:04:54 2010
New Revision: 1050784

URL: http://svn.apache.org/viewvc?rev=1050784&view=rev
Log:
LUCENE-1737: set up FieldInfos correctly the when we open a pre-3.1 index; pass FieldInfos to SegmentMerger

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sun Dec 19 10:04:54 2010
@@ -126,8 +126,6 @@ final class DocumentsWriter {
   boolean bufferIsFull;                   // True when it's time to write segment
   private boolean aborting;               // True if an abort is pending
 
-  private DocFieldProcessor docFieldProcessor;
-
   PrintStream infoStream;
   int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
   Similarity similarity;
@@ -292,9 +290,6 @@ final class DocumentsWriter {
     flushControl = writer.flushControl;
 
     consumer = indexingChain.getChain(this);
-    if (consumer instanceof DocFieldProcessor) {
-      docFieldProcessor = (DocFieldProcessor) consumer;
-    }
   }
 
   // Buffer a specific docID for deletion.  Currently only
@@ -352,13 +347,6 @@ final class DocumentsWriter {
     return fieldInfos;
   }
 
-  /** Returns true if any of the fields in the current
-   *  buffered docs have omitTermFreqAndPositions==false */
-  boolean hasProx() {
-    return (docFieldProcessor != null) ? fieldInfos.hasProx()
-                                       : true;
-  }
-
   /** If non-null, various details of indexing are printed
    *  here. */
   synchronized void setInfoStream(PrintStream infoStream) {
@@ -711,7 +699,7 @@ final class DocumentsWriter {
 
       final SegmentWriteState flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval());
 
-      newSegment = new SegmentInfo(segment, numDocsInRAM, directory, false, true, -1, null, false, hasProx(), false);
+      newSegment = new SegmentInfo(segment, numDocsInRAM, directory, false, true, -1, null, false, fieldInfos.hasProx(), false);
 
       if (!closeDocStore || docStoreOffset != 0) {
         newSegment.setDocStoreSegment(docStoreSegment);

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sun Dec 19 10:04:54 2010
@@ -243,6 +243,13 @@ final class FieldInfos {
     return fi;
   }
 
+  synchronized public FieldInfo add(FieldInfo fi) {
+    return add(fi.name, fi.isIndexed, fi.storeTermVector,
+               fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
+               fi.omitNorms, fi.storePayloads,
+               fi.omitTermFreqAndPositions);
+  }
+
   private FieldInfo addInternal(String name, boolean isIndexed,
                                 boolean storeTermVector, boolean storePositionWithTermVector, 
                                 boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Dec 19 10:04:54 2010
@@ -17,36 +17,36 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util.Version;
 
-import java.io.IOException;
-import java.io.Closeable;
-import java.io.PrintStream;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.List;
-import java.util.Collection;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Date;
-
 /**
   An <code>IndexWriter</code> creates and maintains an index.
 
@@ -1167,20 +1167,41 @@ public class IndexWriter implements Clos
     }
   }
 
-  private FieldInfos getCurrentFieldInfos() throws IOException {
-    final FieldInfos fieldInfos;
-    if (segmentInfos.size() > 0) {
-      SegmentInfo info = segmentInfos.info(segmentInfos.size()-1);
-      Directory cfsDir;
+  private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
+    Directory cfsDir = null;
+    try {
       if (info.getUseCompoundFile()) {
         cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION));
       } else {
         cfsDir = directory;
       }
-      fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
-      if (info.getUseCompoundFile()) {
+      return new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, IndexFileNames.FIELD_INFOS_EXTENSION));
+    } finally {
+      if (info.getUseCompoundFile() && cfsDir != null) {
         cfsDir.close();
       }
+    }
+  }
+
+  private FieldInfos getCurrentFieldInfos() throws IOException {
+    final FieldInfos fieldInfos;
+    if (segmentInfos.size() > 0) {
+      if (segmentInfos.getFormat() > SegmentInfos.FORMAT_DIAGNOSTICS) {
+        // Pre-3.1 index.  In this case we sweep all
+        // segments, merging their FieldInfos:
+        fieldInfos = new FieldInfos();
+        for(SegmentInfo info : segmentInfos) {
+          final FieldInfos segFieldInfos = getFieldInfos(info);
+          final int fieldCount = segFieldInfos.size();
+          for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
+            fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
+          }
+        }
+      } else {
+        // Already a 3.1 index; just seed the FieldInfos
+        // from the last segment
+        fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
+      }
     } else {
       fieldInfos = new FieldInfos();
     }
@@ -2897,7 +2918,9 @@ public class IndexWriter implements Clos
 
     try {
       String mergedName = newSegmentName();
-      SegmentMerger merger = new SegmentMerger(this, mergedName, null);
+      SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
+                                               mergedName, null, payloadProcessorProvider,
+                                               ((FieldInfos) docWriter.getFieldInfos().clone()));
       
       for (IndexReader reader : readers)      // add new indexes
         merger.add(reader);
@@ -3983,8 +4006,6 @@ public class IndexWriter implements Clos
 
     final String mergedName = merge.info.name;
     
-    SegmentMerger merger = null;
-
     int mergedDocCount = 0;
 
     SegmentInfos sourceSegments = merge.segments;
@@ -3993,7 +4014,9 @@ public class IndexWriter implements Clos
     if (infoStream != null)
       message("merging " + merge.segString(directory));
 
-    merger = new SegmentMerger(this, mergedName, merge);
+    SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
+                                             payloadProcessorProvider,
+                                             ((FieldInfos) docWriter.getFieldInfos().clone()));
 
     merge.readers = new SegmentReader[numSegments];
     merge.readersClone = new SegmentReader[numSegments];
@@ -4074,6 +4097,7 @@ public class IndexWriter implements Clos
 
         // Clear DSS
         merge.info.setDocStore(-1, null, false);
+        message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
       }
 
       // This is where all the work happens:

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sun Dec 19 10:04:54 2010
@@ -109,12 +109,22 @@ public final class SegmentInfos extends 
 
   private Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
 
+  private int format;
+
   /**
    * If non-null, information about loading segments_N files
    * will be printed here.  @see #setInfoStream.
    */
   private static PrintStream infoStream = null;
 
+  public void setFormat(int format) {
+    this.format = format;
+  }
+
+  public int getFormat() {
+    return format;
+  }
+
   public final SegmentInfo info(int i) {
     return get(i);
   }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sun Dec 19 10:04:54 2010
@@ -52,7 +52,7 @@ final class SegmentMerger {
   private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
 
   private List<IndexReader> readers = new ArrayList<IndexReader>();
-  private FieldInfos fieldInfos;
+  private final FieldInfos fieldInfos;
   
   private int mergedDocs;
 
@@ -68,28 +68,12 @@ final class SegmentMerger {
       when merging stored fields */
   private final static int MAX_RAW_MERGE_DOCS = 4192;
 
-  private final PayloadProcessorProvider pcp;
+  private final PayloadProcessorProvider payloadProcessorProvider;
   
-  /** This ctor used only by test code.
-   * 
-   * @param dir The Directory to merge the other segments into
-   * @param name The name of the new segment
-   */
-  SegmentMerger(Directory dir, String name) {
-  	pcp = null;
+  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+    this.payloadProcessorProvider = payloadProcessorProvider;
     directory = dir;
-    segment = name;
-    checkAbort = new CheckAbort(null, null) {
-      @Override
-      public void work(double units) throws MergeAbortedException {
-        // do nothing
-      }
-    };
-  }
-
-  SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge) {
-  	pcp = writer.getPayloadProcessorProvider();
-    directory = writer.getDirectory();
+    this.fieldInfos = fieldInfos;
     segment = name;
     if (merge != null) {
       checkAbort = new CheckAbort(merge, directory);
@@ -101,7 +85,7 @@ final class SegmentMerger {
         }
       };
     }
-    termIndexInterval = writer.getConfig().getTermIndexInterval();
+    this.termIndexInterval = termIndexInterval;
   }
 
   public FieldInfos fieldInfos() {
@@ -222,6 +206,11 @@ final class SegmentMerger {
   private SegmentReader[] matchingSegmentReaders;
   private int[] rawDocLengths;
   private int[] rawDocLengths2;
+  private int matchedCount;
+
+  public int getMatchedSubReaderCount() {
+    return matchedCount;
+  }
 
   private void setMatchingSegmentReaders() {
     // If the i'th reader is a SegmentReader and has
@@ -246,6 +235,7 @@ final class SegmentMerger {
         }
         if (same) {
           matchingSegmentReaders[i] = segmentReader;
+          matchedCount++;
         }
       }
     }
@@ -261,18 +251,7 @@ final class SegmentMerger {
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  private final int mergeFields() throws CorruptIndexException, IOException {
-
-    if (!mergeDocStores) {
-      // When we are not merging by doc stores, their field
-      // name -> number mapping are the same.  So, we start
-      // with the fieldInfos of the last segment in this
-      // case, to keep that numbering.
-      final SegmentReader sr = (SegmentReader) readers.get(readers.size()-1);
-      fieldInfos = (FieldInfos) sr.core.fieldInfos.clone();
-    } else {
-      fieldInfos = new FieldInfos();		  // merge field names
-    }
+  private int mergeFields() throws CorruptIndexException, IOException {
 
     for (IndexReader reader : readers) {
       if (reader instanceof SegmentReader) {
@@ -280,11 +259,7 @@ final class SegmentMerger {
         FieldInfos readerFieldInfos = segmentReader.fieldInfos();
         int numReaderFieldInfos = readerFieldInfos.size();
         for (int j = 0; j < numReaderFieldInfos; j++) {
-          FieldInfo fi = readerFieldInfos.fieldInfo(j);
-          fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector,
-              fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
-              !reader.hasNorms(fi.name), fi.storePayloads,
-              fi.omitTermFreqAndPositions);
+          fieldInfos.add(readerFieldInfos.fieldInfo(j));
         }
       } else {
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
@@ -570,8 +545,8 @@ final class SegmentMerger {
       IndexReader reader = readers.get(i);
       TermEnum termEnum = reader.terms();
       SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
-      if (pcp != null) {
-        smi.dirPayloadProcessor = pcp.getDirProcessor(reader.directory());
+      if (payloadProcessorProvider != null) {
+        smi.dirPayloadProcessor = payloadProcessorProvider.getDirProcessor(reader.directory());
       }
       int[] docMap  = smi.getDocMap();
       if (docMap != null) {

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Sun Dec 19 10:04:54 2010
@@ -300,6 +300,9 @@ public class TestBackwardsCompatibility 
 
   public void testIndexOldIndex() throws IOException {
     for(int i=0;i<oldNames.length;i++) {
+      if (VERBOSE) {
+        System.out.println("TEST: oldName=" + oldNames[i]);
+      }
       unzip(getDataFile("index." + oldNames[i] + ".zip"), oldNames[i]);
       changeIndexWithAdds(random, oldNames[i]);
       rmDir(oldNames[i]);
@@ -405,6 +408,7 @@ public class TestBackwardsCompatibility 
     Directory dir = newFSDirectory(new File(dirName));
     // open writer
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
+    writer.setInfoStream(VERBOSE ? System.out : null);
     // add 10 docs
     for(int i=0;i<10;i++) {
       addDoc(writer, 35+i);

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestDoc.java Sun Dec 19 10:04:54 2010
@@ -183,7 +183,7 @@ public class TestDoc extends LuceneTestC
       SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
       SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
 
-      SegmentMerger merger = new SegmentMerger(si1.dir, merged);
+      SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, null, new FieldInfos());
 
       merger.add(r1);
       merger.add(r2);

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1050784&r1=1050783&r2=1050784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Sun Dec 19 10:04:54 2010
@@ -71,7 +71,7 @@ public class TestSegmentMerger extends L
   }
   
   public void testMerge() throws IOException {                             
-    SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment);
+    SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, null, new FieldInfos());
     merger.add(reader1);
     merger.add(reader2);
     int docsMerged = merger.merge();